magma_int_t magma_strevc3( magma_side_t side, magma_vec_t howmany, magma_int_t *select, // logical in fortran magma_int_t n, float *T, magma_int_t ldt, float *VL, magma_int_t ldvl, float *VR, magma_int_t ldvr, magma_int_t mm, magma_int_t *mout, float *work, magma_int_t lwork, #ifdef COMPLEX float *rwork, #endif magma_int_t *info ) { #define T(i,j) (T + (i) + (j)*ldt) #define VL(i,j) (VL + (i) + (j)*ldvl) #define VR(i,j) (VR + (i) + (j)*ldvr) #define X(i,j) (X + (i)-1 + ((j)-1)*2) // still as 1-based indices #define work(i,j) (work + (i) + (j)*n) // constants const magma_int_t ione = 1; const float c_zero = 0; const float c_one = 1; const magma_int_t nbmin = 16, nbmax = 256; // .. Local Scalars .. magma_int_t allv, bothv, leftv, over, pair, rightv, somev; magma_int_t i, ierr, ii, ip, is, j, k, ki, ki2, iv, n2, nb, nb2, version; float emax, remax; // .. Local Arrays .. // since iv is a 1-based index, allocate one extra here magma_int_t iscomplex[ nbmax+1 ]; // Decode and test the input parameters bothv = (side == MagmaBothSides); rightv = (side == MagmaRight) || bothv; leftv = (side == MagmaLeft ) || bothv; allv = (howmany == MagmaAllVec); over = (howmany == MagmaBacktransVec); somev = (howmany == MagmaSomeVec); *info = 0; if ( ! rightv && ! leftv ) *info = -1; else if ( ! allv && ! over && ! somev ) *info = -2; else if ( n < 0 ) *info = -4; else if ( ldt < max( 1, n ) ) *info = -6; else if ( ldvl < 1 || ( leftv && ldvl < n ) ) *info = -8; else if ( ldvr < 1 || ( rightv && ldvr < n ) ) *info = -10; else if ( lwork < max( 1, 3*n ) ) *info = -14; else { // Set mout to the number of columns required to store the selected // eigenvectors, standardize the array select if necessary, and // test mm. if ( somev ) { *mout = 0; pair = false; for( j=0; j < n; ++j ) { if ( pair ) { pair = false; select[j] = false; } else { if ( j < n-1 ) { if ( *T(j+1,j) == c_zero ) { if ( select[j] ) { *mout += 1; } } else { pair = true; if ( select[j] || select[j+1] ) { select[j] = true; *mout += 2; } } } else if ( select[n-1] ) { *mout += 1; } } } } else { *mout = n; } if ( mm < *mout ) { *info = -11; } } if ( *info != 0 ) { magma_xerbla( __func__, -(*info) ); return *info; } // Quick return if possible. if ( n == 0 ) { return *info; } // Use blocked version (2) if sufficient workspace. // Requires 1 vector for 1-norms, and 2*nb vectors for x and Q*x. // Zero-out the workspace to avoid potential NaN propagation. nb = 2; if ( lwork >= n + 2*n*nbmin ) { version = 2; nb = (lwork - n) / (2*n); nb = min( nb, nbmax ); nb2 = 1 + 2*nb; lapackf77_slaset( "F", &n, &nb2, &c_zero, &c_zero, work, &n ); } else { version = 1; } // Compute 1-norm of each column of strictly upper triangular // part of T to control overflow in triangular solver. *work(0,0) = c_zero; for( j=1; j < n; ++j ) { *work(j,0) = c_zero; for( i=0; i < j; ++i ) { *work(j,0) += fabsf( *T(i,j) ); } } magma_timer_t time_total=0, time_trsv=0, time_gemm=0, time_gemv=0, time_trsv_sum=0, time_gemm_sum=0, time_gemv_sum=0; timer_start( time_total ); // Index ip is used to specify the real or complex eigenvalue: // ip = 0, real eigenvalue (wr), // = 1, first of conjugate complex pair: (wr,wi) // = -1, second of conjugate complex pair: (wr,wi) // iscomplex array stores ip for each column in current block. if ( rightv ) { // ============================================================ // Compute right eigenvectors. // iv is index of column in current block (1-based). // For complex right vector, uses iv-1 for real part and iv for complex part. // Non-blocked version always uses iv=2; // blocked version starts with iv=nb, goes down to 1 or 2. // (Note the "0-th" column is used for 1-norms computed above.) iv = 2; if ( version == 2 ) { iv = nb; } timer_start( time_trsv ); ip = 0; is = *mout - 1; for( ki=n-1; ki >= 0; --ki ) { if ( ip == -1 ) { // previous iteration (ki+1) was second of conjugate pair, // so this ki is first of conjugate pair; skip to end of loop ip = 1; continue; } else if ( ki == 0 ) { // last column, so this ki must be real eigenvalue ip = 0; } else if ( *T(ki,ki-1) == c_zero ) { // zero on sub-diagonal, so this ki is real eigenvalue ip = 0; } else { // non-zero on sub-diagonal, so this ki is second of conjugate pair ip = -1; } if ( somev ) { if ( ip == 0 ) { if ( ! select[ki] ) { continue; } } else { if ( ! select[ki-1] ) { continue; } } } if ( ip == 0 ) { // ------------------------------------------------------------ // Real right eigenvector // Solve upper quasi-triangular system: // [ T(0:ki-1,0:ki-1) - wr ]*X = -T(0:ki-1,ki) magma_slaqtrsd( MagmaNoTrans, ki+1, T(0,0), ldt, work(0,iv), n, work(0,0), &ierr ); // Copy the vector x or Q*x to VR and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VR and normalize. n2 = ki+1; blasf77_scopy( &n2, work(0,iv), &ione, VR(0,is), &ione ); ii = blasf77_isamax( &n2, VR(0,is), &ione ) - 1; // subtract 1; ii is 0-based remax = c_one / fabsf( *VR(ii,is) ); blasf77_sscal( &n2, &remax, VR(0,is), &ione ); for( k=ki + 1; k < n; ++k ) { *VR(k,is) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. time_trsv_sum += timer_stop( time_trsv ); timer_start( time_gemv ); if ( ki > 0 ) { n2 = ki; blasf77_sgemv( "n", &n, &n2, &c_one, VR, &ldvr, work(0, iv), &ione, work(ki,iv), VR(0,ki), &ione ); } time_gemv_sum += timer_stop( time_gemv ); ii = blasf77_isamax( &n, VR(0,ki), &ione ) - 1; // subtract 1; ii is 0-based remax = c_one / fabsf( *VR(ii,ki) ); blasf77_sscal( &n, &remax, VR(0,ki), &ione ); timer_start( time_trsv ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out below vector for( k=ki + 1; k < n; ++k ) { *work(k,iv) = c_zero; } iscomplex[ iv ] = ip; // back-transform and normalization is done below } } // end real eigenvector else { // ------------------------------------------------------------ // Complex right eigenvector // Solve upper quasi-triangular system: // [ T(0:ki-2,0:ki-2) - (wr+i*wi) ]*x = u magma_slaqtrsd( MagmaNoTrans, ki+1, T(0,0), ldt, work(0,iv-1), n, work(0,0), &ierr ); // Copy the vector x or Q*x to VR and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VR and normalize. n2 = ki+1; blasf77_scopy( &n2, work(0,iv-1), &ione, VR(0,is-1), &ione ); blasf77_scopy( &n2, work(0,iv ), &ione, VR(0,is ), &ione ); emax = c_zero; for( k=0; k <= ki; ++k ) { emax = max( emax, fabsf(*VR(k,is-1)) + fabsf(*VR(k,is)) ); } remax = c_one / emax; blasf77_sscal( &n2, &remax, VR(0,is-1), &ione ); blasf77_sscal( &n2, &remax, VR(0,is ), &ione ); for( k=ki + 1; k < n; ++k ) { *VR(k,is-1) = c_zero; *VR(k,is ) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. time_trsv_sum += timer_stop( time_trsv ); timer_start( time_gemv ); if ( ki > 1 ) { n2 = ki-1; blasf77_sgemv( "n", &n, &n2, &c_one, VR, &ldvr, work(0, iv-1), &ione, work(ki-1,iv-1), VR(0,ki-1), &ione ); blasf77_sgemv( "n", &n, &n2, &c_one, VR, &ldvr, work(0, iv), &ione, work(ki,iv), VR(0,ki), &ione ); } else { blasf77_sscal( &n, work(ki-1,iv-1), VR(0,ki-1), &ione ); blasf77_sscal( &n, work(ki, iv ), VR(0,ki ), &ione ); } time_gemv_sum += timer_stop( time_gemv ); emax = c_zero; for( k=0; k < n; ++k ) { emax = max( emax, fabsf(*VR(k,ki-1)) + fabsf(*VR(k,ki)) ); } remax = c_one / emax; blasf77_sscal( &n, &remax, VR(0,ki-1), &ione ); blasf77_sscal( &n, &remax, VR(0,ki ), &ione ); timer_start( time_trsv ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out below vector for( k=ki + 1; k < n; ++k ) { *work(k,iv-1) = c_zero; *work(k,iv ) = c_zero; } iscomplex[ iv-1 ] = -ip; iscomplex[ iv ] = ip; iv -= 1; // back-transform and normalization is done below } } // end real or complex vector if ( version == 2 ) { // ------------------------------------------------------------ // Blocked version of back-transform // For complex case, ki2 includes both vectors (ki-1 and ki) if ( ip == 0 ) { ki2 = ki; } else { ki2 = ki - 1; } // Columns iv:nb of work are valid vectors. // When the number of vectors stored reaches nb-1 or nb, // or if this was last vector, do the GEMM if ( (iv <= 2) || (ki2 == 0) ) { time_trsv_sum += timer_stop( time_trsv ); timer_start( time_gemm ); nb2 = nb-iv+1; n2 = ki2+nb-iv+1; blasf77_sgemm( "n", "n", &n, &nb2, &n2, &c_one, VR, &ldvr, work(0,iv), &n, &c_zero, work(0,nb+iv), &n ); time_gemm_sum += timer_stop( time_gemm ); // normalize vectors // TODO if somev, should copy vectors individually to correct location. for( k=iv; k <= nb; ++k ) { if ( iscomplex[k] == 0 ) { // real eigenvector ii = blasf77_isamax( &n, work(0,nb+k), &ione ) - 1; // subtract 1; ii is 0-based remax = c_one / fabsf( *work(ii,nb+k) ); } else if ( iscomplex[k] == 1 ) { // first eigenvector of conjugate pair emax = c_zero; for( ii=0; ii < n; ++ii ) { emax = max( emax, fabsf( *work(ii,nb+k ) ) + fabsf( *work(ii,nb+k+1) ) ); } remax = c_one / emax; // else if iscomplex[k] == -1 // second eigenvector of conjugate pair // reuse same remax as previous k } blasf77_sscal( &n, &remax, work(0,nb+k), &ione ); } nb2 = nb-iv+1; lapackf77_slacpy( "F", &n, &nb2, work(0,nb+iv), &n, VR(0,ki2), &ldvr ); iv = nb; timer_start( time_trsv ); } else { iv -= 1; } } // end blocked back-transform is -= 1; if ( ip != 0 ) { is -= 1; } } } timer_stop( time_trsv ); timer_stop( time_total ); timer_printf( "trevc trsv %.4f, gemm %.4f, gemv %.4f, total %.4f\n", time_trsv_sum, time_gemm_sum, time_gemv_sum, time_total ); if ( leftv ) { // ============================================================ // Compute left eigenvectors. // iv is index of column in current block (1-based). // For complex left vector, uses iv for real part and iv+1 for complex part. // Non-blocked version always uses iv=1; // blocked version starts with iv=1, goes up to nb-1 or nb. // (Note the "0-th" column is used for 1-norms computed above.) iv = 1; ip = 0; is = 0; for( ki=0; ki < n; ++ki ) { if ( ip == 1 ) { // previous iteration (ki-1) was first of conjugate pair, // so this ki is second of conjugate pair; skip to end of loop ip = -1; continue; } else if ( ki == n-1 ) { // last column, so this ki must be real eigenvalue ip = 0; } else if ( *T(ki+1,ki) == c_zero ) { // zero on sub-diagonal, so this ki is real eigenvalue ip = 0; } else { // non-zero on sub-diagonal, so this ki is first of conjugate pair ip = 1; } if ( somev ) { if ( ! select[ki] ) { continue; } } if ( ip == 0 ) { // ------------------------------------------------------------ // Real left eigenvector // Solve transposed quasi-triangular system: // [ T(ki+1:n,ki+1:n) - wr ]**T * X = -T(ki+1:n,ki) magma_slaqtrsd( MagmaTrans, n-ki, T(ki,ki), ldt, work(ki,iv), n, work(ki,0), &ierr ); // Copy the vector x or Q*x to VL and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VL and normalize. n2 = n-ki; blasf77_scopy( &n2, work(ki,iv), &ione, VL(ki,is), &ione ); ii = blasf77_isamax( &n2, VL(ki,is), &ione ) + ki - 1; // subtract 1; ii is 0-based remax = c_one / fabsf( *VL(ii,is) ); blasf77_sscal( &n2, &remax, VL(ki,is), &ione ); for( k=0; k < ki; ++k ) { *VL(k,is) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. if ( ki < n-1 ) { n2 = n-ki-1; blasf77_sgemv( "n", &n, &n2, &c_one, VL(0,ki+1), &ldvl, work(ki+1,iv), &ione, work(ki, iv), VL(0,ki), &ione ); } ii = blasf77_isamax( &n, VL(0,ki), &ione ) - 1; // subtract 1; ii is 0-based remax = c_one / fabsf( *VL(ii,ki) ); blasf77_sscal( &n, &remax, VL(0,ki), &ione ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out above vector // could go from (ki+1)-NV+1 to ki for( k=0; k < ki; ++k ) { *work(k,iv) = c_zero; } iscomplex[ iv ] = ip; // back-transform and normalization is done below } } // end real eigenvector else { // ------------------------------------------------------------ // Complex left eigenvector // Solve transposed quasi-triangular system: // [ T(ki+2:n,ki+2:n)**T - (wr-i*wi) ]*X = V magma_slaqtrsd( MagmaTrans, n-ki, T(ki,ki), ldt, work(ki,iv), n, work(ki,0), &ierr ); // Copy the vector x or Q*x to VL and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VL and normalize. n2 = n-ki; blasf77_scopy( &n2, work(ki,iv ), &ione, VL(ki,is ), &ione ); blasf77_scopy( &n2, work(ki,iv+1), &ione, VL(ki,is+1), &ione ); emax = c_zero; for( k=ki; k < n; ++k ) { emax = max( emax, fabsf(*VL(k,is))+ fabsf(*VL(k,is+1)) ); } remax = c_one / emax; blasf77_sscal( &n2, &remax, VL(ki,is ), &ione ); blasf77_sscal( &n2, &remax, VL(ki,is+1), &ione ); for( k=0; k < ki; ++k ) { *VL(k,is ) = c_zero; *VL(k,is+1) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. if ( ki < n-2 ) { n2 = n-ki-2; blasf77_sgemv( "n", &n, &n2, &c_one, VL(0,ki+2), &ldvl, work(ki+2,iv), &ione, work(ki, iv), VL(0,ki), &ione ); blasf77_sgemv( "n", &n, &n2, &c_one, VL(0,ki+2), &ldvl, work(ki+2,iv+1), &ione, work(ki+1,iv+1), VL(0,ki+1), &ione ); } else { blasf77_sscal( &n, work(ki, iv ), VL(0, ki ), &ione ); blasf77_sscal( &n, work(ki+1,iv+1), VL(0, ki+1), &ione ); } emax = c_zero; for( k=0; k < n; ++k ) { emax = max( emax, fabsf(*VL(k,ki))+ fabsf(*VL(k,ki+1)) ); } remax = c_one / emax; blasf77_sscal( &n, &remax, VL(0,ki ), &ione ); blasf77_sscal( &n, &remax, VL(0,ki+1), &ione ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out above vector // could go from (ki+1)-NV+1 to ki for( k=0; k < ki; ++k ) { *work(k,iv ) = c_zero; *work(k,iv+1) = c_zero; } iscomplex[ iv ] = ip; iscomplex[ iv+1 ] = -ip; iv += 1; // back-transform and normalization is done below } } // end real or complex eigenvector if ( version == 2 ) { // ------------------------------------------------- // Blocked version of back-transform // For complex case, (ki2+1) includes both vectors (ki+1) and (ki+2) if ( ip == 0 ) { ki2 = ki; } else { ki2 = ki + 1; } // Columns 1:iv of work are valid vectors. // When the number of vectors stored reaches nb-1 or nb, // or if this was last vector, do the GEMM if ( (iv >= nb-1) || (ki2 == n-1) ) { n2 = n-(ki2+1)+iv; blasf77_sgemm( "n", "n", &n, &iv, &n2, &c_one, VL(0,ki2-iv+1), &ldvl, work(ki2-iv+1,1), &n, &c_zero, work(0,nb+1), &n ); // normalize vectors for( k=1; k <= iv; ++k ) { if ( iscomplex[k] == 0 ) { // real eigenvector ii = blasf77_isamax( &n, work(0,nb+k), &ione ) - 1; // subtract 1; ii is 0-based remax = c_one / fabsf( *work(ii,nb+k) ); } else if ( iscomplex[k] == 1) { // first eigenvector of conjugate pair emax = c_zero; for( ii=0; ii < n; ++ii ) { emax = max( emax, fabsf( *work(ii,nb+k ) ) + fabsf( *work(ii,nb+k+1) ) ); } remax = c_one / emax; // else if iscomplex[k] == -1 // second eigenvector of conjugate pair // reuse same remax as previous k } blasf77_sscal( &n, &remax, work(0,nb+k), &ione ); } lapackf77_slacpy( "F", &n, &iv, work(0,nb+1), &n, VL(0,ki2-iv+1), &ldvl ); iv = 1; } else { iv += 1; } } // blocked back-transform is += 1; if ( ip != 0 ) { is += 1; } } } return *info; } // end of STREVC3
Vector SchemeRoe(const Cell& Cell1,const Cell& Cell2,const Cell& Cell3,const Cell& Cell4, int AxisNo) { // Local variables Vector V1, V2, V3, V4; // Velocities real rho1, rho2, rho3, rho4; // Densities real p1, p2, p3, p4; // Pressures real rhoE1, rhoE2, rhoE3, rhoE4; // Energies Vector Result(QuantityNb); Vector F1(QuantityNb); // Fluxes Vector F2(QuantityNb); Vector F3(QuantityNb); Vector F4(QuantityNb); Vector Q1, Q2, Q3, Q4; // Conservative quantities Vector FL(QuantityNb),FR(QuantityNb); // Left and right fluxex Vector QL(QuantityNb),QR(QuantityNb); // Left and right conservative quantities real rhoL, rhoR; // Left and right densities real pL, pR; // Left and right pressures real rhoEL, rhoER; // Left and right energies Vector VL(Dimension), VR(Dimension); // Left and right velocities Vector One(QuantityNb); real rho; // central density with Roe's average Vector V(Dimension); // central velocity with Roe's average real H; // central enthalpy with Roe's average real c; // central speed of sound with Roe's average real Roe; // Coefficient for Roe's average Matrix L, R; // left and right eigenmatrix Matrix Lambda(QuantityNb); // diagonal matrix containing the eigenvalues Matrix A; // absolute value of the jacobian matrix Vector Lim; // limiter (Van Leer) int i; // coutner // vector one. for(i=1; i<=QuantityNb; i++ ) One.setValue(i,1.); // --- Get conservative quantities --- Q1 = Cell1.average(); Q2 = Cell2.average(); Q3 = Cell3.average(); Q4 = Cell4.average(); // --- Get primitive variables --- // density rho1 = Cell1.density(); rho2 = Cell2.density(); rho3 = Cell3.density(); rho4 = Cell4.density(); // velocity V1 = Cell1.velocity(); V2 = Cell2.velocity(); V3 = Cell3.velocity(); V4 = Cell4.velocity(); // energy rhoE1 = Cell1.energy(); rhoE2 = Cell2.energy(); rhoE3 = Cell3.energy(); rhoE4 = Cell4.energy(); // pressure p1 = Cell1.pressure(); p2 = Cell2.pressure(); p3 = Cell3.pressure(); p4 = Cell4.pressure(); // --- Compute Euler fluxes --- F1.setValue(1,rho1*V1.value(AxisNo)); F2.setValue(1,rho2*V2.value(AxisNo)); F3.setValue(1,rho3*V3.value(AxisNo)); F4.setValue(1,rho4*V4.value(AxisNo)); for(i=1; i<=Dimension; i++) { F1.setValue(i+1, rho1*V1.value(AxisNo)*V1.value(i) + ((AxisNo == i)? p1 : 0.)); F2.setValue(i+1, rho2*V2.value(AxisNo)*V2.value(i) + ((AxisNo == i)? p2 : 0.)); F3.setValue(i+1, rho3*V3.value(AxisNo)*V3.value(i) + ((AxisNo == i)? p3 : 0.)); F4.setValue(i+1, rho4*V4.value(AxisNo)*V4.value(i) + ((AxisNo == i)? p4 : 0.)); } F1.setValue(QuantityNb,(rhoE1+p1)*V1.value(AxisNo)); F2.setValue(QuantityNb,(rhoE2+p2)*V2.value(AxisNo)); F3.setValue(QuantityNb,(rhoE3+p3)*V3.value(AxisNo)); F4.setValue(QuantityNb,(rhoE4+p4)*V4.value(AxisNo)); // --- Van Leer limiter --- // Left Lim = Limiter(Q3-Q2, Q2-Q1); FL = F2 + 0.5*(Lim|(F2-F1)) + 0.5*((One-Lim)|(F3-F2)); QL = Q2 + 0.5*(Lim|(Q2-Q1)) + 0.5*((One-Lim)|(Q3-Q2)); // Right Lim = Limiter(Q3-Q2, Q4-Q3); FR = F3 - 0.5*(Lim|(F4-F3)) - 0.5*((One-Lim)|(F3-F2)); QR = Q3 - 0.5*(Lim|(Q4-Q3)) - 0.5*((One-Lim)|(Q3-Q2)); /* FL = F2; FR = F3; QL = Q2; QR = Q3; */ // --- Extract left and right primitive variables --- rhoL = QL.value(1); rhoR = QR.value(1); for (i=1; i<= Dimension; i++) { VL.setValue(i,QL.value(i+1)/rhoL); VR.setValue(i,QR.value(i+1)/rhoR); } rhoEL=QL.value(QuantityNb); rhoER=QR.value(QuantityNb); pL = (Gamma-1)*(rhoEL - .5*rhoL*(VL*VL)); pR = (Gamma-1)*(rhoER - .5*rhoR*(VR*VR)); // --- Compute Roe's averages --- Roe = sqrt(rhoR/rhoL); rho = Roe*rhoL; V = 1./(1.+Roe)*( Roe*VR + VL ); H = 1./(1.+Roe)*( Roe*(rhoER+pR)/rhoR + (rhoEL+pL)/rhoL ); c = sqrt ( (Gamma-1)*( H - 0.5*(V*V) ) ); // --- Compute diagonal matrix containing the absolute value of the eigenvalues --- for (i=1;i<=Dimension;i++) Lambda.setValue(i,i, fabs(V.value(AxisNo))); Lambda.setValue(Dimension+1, Dimension+1, fabs(V.value(AxisNo)+c)); Lambda.setValue(Dimension+2, Dimension+2, fabs(V.value(AxisNo)-c)); // --- Set left and right eigenmatrices --- L.setEigenMatrix(true, AxisNo, V, c); R.setEigenMatrix(false, AxisNo, V, c, H); // --- Compute absolute Jacobian matrix --- A = R*Lambda*L; // --- Compute Euler Flux --- Result = 0.5*(FL+FR) - 0.5*(A*(QR-QL)); return Result; }
/* Subroutine */ int ztrsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublecomplex *work, integer *ldwork, doublereal *rwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZTRSNA estimates reciprocal condition numbers for specified eigenvalues and/or right eigenvectors of a complex upper triangular matrix T (or of any matrix Q*T*Q**H with Q unitary). Arguments ========= JOB (input) CHARACTER*1 Specifies whether condition numbers are required for eigenvalues (S) or eigenvectors (SEP): = 'E': for eigenvalues only (S); = 'V': for eigenvectors only (SEP); = 'B': for both eigenvalues and eigenvectors (S and SEP). HOWMNY (input) CHARACTER*1 = 'A': compute condition numbers for all eigenpairs; = 'S': compute condition numbers for selected eigenpairs specified by the array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenpairs for which condition numbers are required. To select condition numbers for the j-th eigenpair, SELECT(j) must be set to .TRUE.. If HOWMNY = 'A', SELECT is not referenced. N (input) INTEGER The order of the matrix T. N >= 0. T (input) COMPLEX*16 array, dimension (LDT,N) The upper triangular matrix T. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). VL (input) COMPLEX*16 array, dimension (LDVL,M) If JOB = 'E' or 'B', VL must contain left eigenvectors of T (or of any Q*T*Q**H with Q unitary), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VL, as returned by ZHSEIN or ZTREVC. If JOB = 'V', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. VR (input) COMPLEX*16 array, dimension (LDVR,M) If JOB = 'E' or 'B', VR must contain right eigenvectors of T (or of any Q*T*Q**H with Q unitary), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VR, as returned by ZHSEIN or ZTREVC. If JOB = 'V', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. S (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'E' or 'B', the reciprocal condition numbers of the selected eigenvalues, stored in consecutive elements of the array. Thus S(j), SEP(j), and the j-th columns of VL and VR all correspond to the same eigenpair (but not in general the j-th eigenpair, unless all eigenpairs are selected). If JOB = 'V', S is not referenced. SEP (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'V' or 'B', the estimated reciprocal condition numbers of the selected eigenvectors, stored in consecutive elements of the array. If JOB = 'E', SEP is not referenced. MM (input) INTEGER The number of elements in the arrays S (if JOB = 'E' or 'B') and/or SEP (if JOB = 'V' or 'B'). MM >= M. M (output) INTEGER The number of elements of the arrays S and/or SEP actually used to store the estimated condition numbers. If HOWMNY = 'A', M is set to N. WORK (workspace) COMPLEX*16 array, dimension (LDWORK,N+1) If JOB = 'E', WORK is not referenced. LDWORK (input) INTEGER The leading dimension of the array WORK. LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. RWORK (workspace) DOUBLE PRECISION array, dimension (N) If JOB = 'E', RWORK is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The reciprocal of the condition number of an eigenvalue lambda is defined as S(lambda) = |v'*u| / (norm(u)*norm(v)) where u and v are the right and left eigenvectors of T corresponding to lambda; v' denotes the conjugate transpose of v, and norm(u) denotes the Euclidean norm. These reciprocal condition numbers always lie between zero (very badly conditioned) and one (very well conditioned). If n = 1, S(lambda) is defined to be 1. An approximate error bound for a computed eigenvalue W(i) is given by EPS * norm(T) / S(i) where EPS is the machine precision. The reciprocal of the condition number of the right eigenvector u corresponding to lambda is defined as follows. Suppose T = ( lambda c ) ( 0 T22 ) Then the reciprocal condition number is SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) where sigma-min denotes the smallest singular value. We approximate the smallest singular value by the reciprocal of an estimate of the one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is defined to be abs(T(1,1)). An approximate error bound for a computed right eigenvector VR(i) is given by EPS * norm(T) / SEP(i) ===================================================================== Decode and test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, work_dim1, work_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2; doublecomplex z__1; /* Builtin functions */ double z_abs(doublecomplex *), d_imag(doublecomplex *); /* Local variables */ static integer kase, ierr; static doublecomplex prod; static doublereal lnrm, rnrm; static integer i, j, k; static doublereal scale; extern logical lsame_(char *, char *); extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublecomplex dummy[1]; static logical wants; static doublereal xnorm; extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_( char *); static integer ks, ix; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; static logical wantbh; extern /* Subroutine */ int zlacon_(integer *, doublecomplex *, doublecomplex *, doublereal *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); static logical somcon; extern /* Subroutine */ int zdrscl_(integer *, doublereal *, doublecomplex *, integer *); static char normin[1]; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublereal smlnum; static logical wantsp; extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublereal *, integer *), ztrexc_(char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *); static doublereal eps, est; #define DUMMY(I) dummy[(I)] #define SELECT(I) select[(I)-1] #define S(I) s[(I)-1] #define SEP(I) sep[(I)-1] #define RWORK(I) rwork[(I)-1] #define T(I,J) t[(I)-1 + ((J)-1)* ( *ldt)] #define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)] #define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)] #define WORK(I,J) work[(I)-1 + ((J)-1)* ( *ldwork)] wantbh = lsame_(job, "B"); wants = lsame_(job, "E") || wantbh; wantsp = lsame_(job, "V") || wantbh; somcon = lsame_(howmny, "S"); /* Set M to the number of eigenpairs for which condition numbers are to be computed. */ if (somcon) { *m = 0; i__1 = *n; for (j = 1; j <= *n; ++j) { if (SELECT(j)) { ++(*m); } /* L10: */ } } else { *m = *n; } *info = 0; if (! wants && ! wantsp) { *info = -1; } else if (! lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldvl < 1 || wants && *ldvl < *n) { *info = -8; } else if (*ldvr < 1 || wants && *ldvr < *n) { *info = -10; } else if (*mm < *m) { *info = -13; } else if (*ldwork < 1 || wantsp && *ldwork < *n) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTRSNA", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (somcon) { if (! SELECT(1)) { return 0; } } if (wants) { S(1) = 1.; } if (wantsp) { SEP(1) = z_abs(&T(1,1)); } return 0; } /* Get machine constants */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); ks = 1; i__1 = *n; for (k = 1; k <= *n; ++k) { if (somcon) { if (! SELECT(k)) { goto L50; } } if (wants) { /* Compute the reciprocal condition number of the k-th eigenvalue. */ zdotc_(&z__1, n, &VR(1,ks), &c__1, &VL(1,ks), &c__1); prod.r = z__1.r, prod.i = z__1.i; rnrm = dznrm2_(n, &VR(1,ks), &c__1); lnrm = dznrm2_(n, &VL(1,ks), &c__1); S(ks) = z_abs(&prod) / (rnrm * lnrm); } if (wantsp) { /* Estimate the reciprocal condition number of the k-th eigenvector. Copy the matrix T to the array WORK and swap the k-th diagonal element to the (1,1) position. */ zlacpy_("Full", n, n, &T(1,1), ldt, &WORK(1,1), ldwork); ztrexc_("No Q", n, &WORK(1,1), ldwork, dummy, &c__1, &k, & c__1, &ierr); /* Form C = T22 - lambda*I in WORK(2:N,2:N). */ i__2 = *n; for (i = 2; i <= *n; ++i) { i__3 = i + i * work_dim1; i__4 = i + i * work_dim1; i__5 = work_dim1 + 1; z__1.r = WORK(i,i).r - WORK(1,1).r, z__1.i = WORK(i,i).i - WORK(1,1).i; WORK(i,i).r = z__1.r, WORK(i,i).i = z__1.i; /* L20: */ } /* Estimate a lower bound for the 1-norm of inv(C'). The 1st and (N+1)th columns of WORK are used to store work ve ctors. */ SEP(ks) = 0.; est = 0.; kase = 0; *(unsigned char *)normin = 'N'; L30: i__2 = *n - 1; zlacon_(&i__2, &WORK(1,*n+1), &WORK(1,1) , &est, &kase); if (kase != 0) { if (kase == 1) { /* Solve C'*x = scale*b */ i__2 = *n - 1; zlatrs_("Upper", "Conjugate transpose", "Nonunit", normin, &i__2, &WORK(2,2), ldwork, & WORK(1,1), &scale, &RWORK(1), &ierr); } else { /* Solve C*x = scale*b */ i__2 = *n - 1; zlatrs_("Upper", "No transpose", "Nonunit", normin, &i__2, &WORK(2,2), ldwork, &WORK(1,1), &scale, &RWORK(1), &ierr); } *(unsigned char *)normin = 'Y'; if (scale != 1.) { /* Multiply by 1/SCALE if doing so will no t cause overflow. */ i__2 = *n - 1; ix = izamax_(&i__2, &WORK(1,1), &c__1); i__2 = ix + work_dim1; xnorm = (d__1 = WORK(ix,1).r, abs(d__1)) + (d__2 = d_imag( &WORK(ix,1)), abs(d__2)); if (scale < xnorm * smlnum || scale == 0.) { goto L40; } zdrscl_(n, &scale, &WORK(1,1), &c__1); } goto L30; } SEP(ks) = 1. / max(est,smlnum); } L40: ++ks; L50: ; } return 0; /* End of ZTRSNA */ } /* ztrsna_ */
CRhinoCommand::result CCommandVRCreateViews::RunCommand( const CRhinoCommandContext& context ) { AFX_MANAGE_STATE( ::RhinoApp().RhinoModuleState() ); // dunno, from example ON_wString wStr; wStr.Format( L"READY SET\n", EnglishCommandName() ); RhinoApp().Print( wStr ); ON_SimpleArray<CRhinoView*> viewList; // don't know what is up with* this* ON_SimpleArray<ON_UUID> viewportIds; CRhinoView* lView = 0; CRhinoView* rView = 0; ON_SimpleArray<CRhinoView*> lrViews; // will contain our vr views int i = 0; // also use this in loops int lr = 0; // use to track 1st and 2nd find // builds a list of (current) viewport IDs context.m_doc.GetViewList( viewList, true, false ); for ( i = 0; i < viewList.Count(); i ++) { CRhinoView* tempView = viewList[i]; // pull view out -> this is redeclared here, in sample, but not in second loop if (tempView) viewportIds.Append( tempView->ActiveViewportID() ); } viewList.Empty(); // empty bc we are going to re-build later when new views context.m_doc.NewView( ON_3dmView() ); context.m_doc.NewView( ON_3dmView() ); // we will build two // find viewport UUID just created context.m_doc.GetViewList( viewList, true, false); for (i = 0; i < viewList.Count(); i++) { CRhinoView* tempView = viewList[i]; if (tempView) { int rc = viewportIds.Search( tempView->ActiveViewportID() ); // returns index of 1st element which satisfies search. returns -1 when no such item found if (rc < 0 ) // if current tempView did not exist prior to this running { if (lr > 0) // and if lr already found 1 { rView = tempView; // right is 2nd view we find break; // so this breaks when we find, and lView is left as the viewList[i] where we found the new viewport, whose ID was not in our list. // and we are left with lView being = viewList[i] at new view } if (lr == 0) { lView = tempView; // left is 1st view lr = 1; } } else tempView = 0; // reset lView to null and re-loop } } lrViews.Append(lView); lrViews.Append(rView); // init points ON_3dPoint locationL = ON_3dPoint(100.0,100.0,100.0); ON_3dPoint locationR = ON_3dPoint(100.0,165.1,100.0); ON_3dPoint targetSetup = ON_3dPoint(0,0,0); if (lView && rView) { for (int i = 0; i < 2; i++) { // RhinoApp().ActiveView()-> ON_3dmView onView = lrViews[i]->ActiveViewport().View(); if(i == 0) onView.m_name = L"lView"; //lrViews[i]->MoveWindow(0,0,VR().resolution.w/2,VR().resolution.h, true); if(i == 1) onView.m_name = L"rView"; //lrViews[i]->MoveWindow(960,0,VR().resolution.w/2,VR().resolution.h, true); lrViews[i]->ActiveViewport().SetView(onView); lrViews[i]->ActiveViewport().m_v.m_vp.ChangeToPerspectiveProjection(50,true,35); lrViews[i]->ActiveViewport().m_v.m_vp.SetCameraLocation(locationL); lrViews[i]->FloatRhinoView(true); lrViews[i]->Redraw(); } } VR().lView = lView; VR().rView = rView; ON_wString SYNC; SYNC.Format(L"SYNCVRBEGIN\n" ); RhinoApp().Print( SYNC ); if (vrConduit.IsEnabled() && ::IsWindow( vrConduit.m_hWnd1 ) && ::IsWindow( vrConduit.m_hWnd2 ) ) // if is already enabled ? { vrConduit.m_pView1 = 0; vrConduit.m_pView2 = 0; vrConduit.Disable(); } else { vrConduit.m_pView1 = lView; vrConduit.m_pView2 = rView; vrConduit.m_hWnd1 = vrConduit.m_pView1->m_hWnd; vrConduit.m_hWnd2 = vrConduit.m_pView2->m_hWnd; SyncVR(lView, rView); // ok it runs once. we should also set them up perspective & looking at 0,0 vrConduit.Bind( *lView ); vrConduit.Bind( *rView ); lView->Redraw(); rView->Redraw(); vrConduit.Enable(); } // but do not update names immediately; have to refresh somehow // now re-name update positions outside of loop: continuously // bring in OVR Tracking and assign to VR Viewports // then, orbit? return CRhinoCommand::success; }
/*! calculate generalized eigenvalues and generalized right eigenvectors\n All of the arguments don't need to be initialized. wr, wi, vrr and vri are overwitten and become real and imaginary part of generalized eigenvalue and generalized right eigenvector, respectively. This matrix and matB are also overwritten. */ inline long dgematrix::dggev(dgematrix& matB, std::vector<double>& wr, std::vector<double>& wi, std::vector<dcovector>& vrr, std::vector<dcovector>& vri) { #ifdef CPPL_VERBOSE std::cerr << "# [MARK] dgematrix::dggev(dgematrix&, std::vector<double>&, std::vector<double>&, std::vector<dcovector>&, std::vector<dcovector>&)" << std::endl; #endif//CPPL_VERBOSE #ifdef CPPL_DEBUG if(M!=N){ std::cerr << "[ERROR] dgematrix::dggev" << "(dgematrix&, vector<double>&, vector<double>&, " << "vector<dcovector>&, vector<dcovector>&)" << std::endl << "This matrix is not a square matrix." << std::endl << "This matrix is (" << M << "x" << N << ")." << std::endl; exit(1); } if(matB.M!=N || matB.N!=N){ std::cerr << "[ERROR] dgematrix::dggev" << "(dgematrix&, vector<double>&, vector<double>&, " << "vector<dcovector>&, vector<dcovector>&)" << std::endl << "The matrix B is not a square matrix " << "having the same size as \"this\" matrix." << std::endl << "The B matrix is (" << matB.M << "x" << matB.N << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG wr.resize(N); wi.resize(N); vrr.resize(N); vri.resize(N); for(long i=0; i<N; i++){ vrr[i].resize(N); vri[i].resize(N); } dgematrix VR(N,N); char JOBVL('N'), JOBVR('V'); long LDA(N), LDB(N), LDVL(1), LDVR(N), LWORK(8*N), INFO(1); double *BETA(new double[N]), *VL(NULL), *WORK(new double[LWORK]); dggev_(JOBVL, JOBVR, N, Array, LDA, matB.Array, LDB, &wr[0], &wi[0], BETA, VL, LDVL, VR.Array, LDVR, WORK, LWORK, INFO); delete [] WORK; delete [] VL; //// reforming //// for(long i=0; i<N; i++){ wr[i]/=BETA[i]; wi[i]/=BETA[i]; } delete [] BETA; //// forming //// for(long j=0; j<N; j++){ if(fabs(wi[j])<1e-10){ for(long i=0; i<N; i++){ vrr[j](i) = VR(i,j); vri[j](i) = 0.0; } } else{ for(long i=0; i<N; i++){ vrr[j](i) = VR(i,j); vri[j](i) = VR(i,j+1); vrr[j+1](i) = VR(i,j); vri[j+1](i) =-VR(i,j+1); } j++; } } if(INFO!=0){ std::cerr << "[WARNING] dgematrix::dggev" << "(dgematrix&, vector<double>&, vector<double>&, " << "vector<dcovector>&, vector<dcovector>&)" << std::endl << "Serious trouble happend. INFO = " << INFO << "." << std::endl; } return INFO; }
/** Purpose ------- DGEEV computes for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**T * A = lambda(j) * u(j)**T where u(j)**T denotes the transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments --------- @param[in] jobvl magma_vec_t - = MagmaNoVec: left eigenvectors of A are not computed; - = MagmaVec: left eigenvectors of are computed. @param[in] jobvr magma_vec_t - = MagmaNoVec: right eigenvectors of A are not computed; - = MagmaVec: right eigenvectors of A are computed. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A DOUBLE PRECISION array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] wr DOUBLE PRECISION array, dimension (N) @param[out] wi DOUBLE PRECISION array, dimension (N) WR and WI contain the real and imaginary parts, respectively, of the computed eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with the eigenvalue having the positive imaginary part first. @param[out] VL DOUBLE PRECISION array, dimension (LDVL,N) If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = MagmaNoVec, VL is not referenced. u(j) = VL(:,j), the j-th column of VL. @param[in] ldvl INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = MagmaVec, LDVL >= N. @param[out] VR DOUBLE PRECISION array, dimension (LDVR,N) If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = MagmaNoVec, VR is not referenced. v(j) = VR(:,j), the j-th column of VR. @param[in] ldvr INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = MagmaVec, LDVR >= N. @param[out] work (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. LWORK >= (2+nb)*N. For optimal performance, LWORK >= (2+2*nb)*N. \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. @ingroup magma_dgeev_driver ********************************************************************/ extern "C" magma_int_t magma_dgeev_m( magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n, double *A, magma_int_t lda, double *wr, double *wi, double *VL, magma_int_t ldvl, double *VR, magma_int_t ldvr, double *work, magma_int_t lwork, magma_int_t *info ) { #define VL(i,j) (VL + (i) + (j)*ldvl) #define VR(i,j) (VR + (i) + (j)*ldvr) const magma_int_t ione = 1; const magma_int_t izero = 0; double d__1, d__2; double r, cs, sn, scl; double dum[1], eps; double anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb; magma_int_t scalea, minwrk, optwrk, lquery, wantvl, wantvr, select[1]; magma_side_t side = MagmaRight; magma_timer_t time_total=0, time_gehrd=0, time_unghr=0, time_hseqr=0, time_trevc=0, time_sum=0; magma_flops_t flop_total=0, flop_gehrd=0, flop_unghr=0, flop_hseqr=0, flop_trevc=0, flop_sum=0; timer_start( time_total ); flops_start( flop_total ); *info = 0; lquery = (lwork == -1); wantvl = (jobvl == MagmaVec); wantvr = (jobvr == MagmaVec); if (! wantvl && jobvl != MagmaNoVec) { *info = -1; } else if (! wantvr && jobvr != MagmaNoVec) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -9; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -11; } /* Compute workspace */ nb = magma_get_dgehrd_nb( n ); if (*info == 0) { minwrk = (2 + nb)*n; optwrk = (2 + 2*nb)*n; work[0] = MAGMA_D_MAKE( (double) optwrk, 0. ); if (lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(Version3) || defined(Version4) || defined(Version5) double *dT; if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif #if defined(Version4) || defined(Version5) double *T; if (MAGMA_SUCCESS != magma_dmalloc_cpu( &T, nb*n )) { magma_free( dT ); *info = MAGMA_ERR_HOST_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_dlamch( "P" ); smlnum = lapackf77_dlamch( "S" ); bignum = 1. / smlnum; lapackf77_dlabad( &smlnum, &bignum ); smlnum = magma_dsqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_dlascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (Workspace: need N) * - this space is reserved until after gebak */ ibal = 0; lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr ); /* Reduce to upper Hessenberg form * (Workspace: need 3*N, prefer 2*N + N*NB) * - including N reserved for gebal/gebak, unused by dgehrd */ itau = ibal + n; iwrk = itau + n; liwrk = lwork - iwrk; timer_start( time_gehrd ); flops_start( flop_gehrd ); #if defined(Version1) // Version 1 - LAPACK lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version2) // Version 2 - LAPACK consistent HRD magma_dgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored, magma_dgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #elif defined(Version4) || defined(Version5) // Version 4 - Multi-GPU, T on host magma_dgehrd_m( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, T, &ierr ); magma_dsetmatrix( nb, n, T, nb, dT, nb ); #endif time_sum += timer_stop( time_gehrd ); flop_sum += flops_stop( flop_gehrd ); if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side = MagmaLeft; lapackf77_dlacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl ); /* Generate orthogonal matrix in VL * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) * - including N reserved for gebal/gebak, unused by dorghr */ timer_start( time_unghr ); flops_start( flop_unghr ); #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_dorghr_m( n, ilo, ihi, VL, ldvl, &work[itau], T, nb, &ierr ); #endif time_sum += timer_stop( time_unghr ); flop_sum += flops_stop( flop_unghr ); timer_start( time_hseqr ); flops_start( flop_hseqr ); /* Perform QR iteration, accumulating Schur vectors in VL * (Workspace: need N+1, prefer N+HSWORK (see comments) ) * - including N reserved for gebal/gebak, unused by dhseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi, VL, &ldvl, &work[iwrk], &liwrk, info ); time_sum += timer_stop( time_hseqr ); flop_sum += flops_stop( flop_hseqr ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side = MagmaBothSides; lapackf77_dlacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side = MagmaRight; lapackf77_dlacpy( "L", &n, &n, A, &lda, VR, &ldvr ); /* Generate orthogonal matrix in VR * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) * - including N reserved for gebal/gebak, unused by dorghr */ timer_start( time_unghr ); flops_start( flop_unghr ); #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_dorghr_m( n, ilo, ihi, VR, ldvr, &work[itau], T, nb, &ierr ); #endif time_sum += timer_stop( time_unghr ); flop_sum += flops_stop( flop_unghr ); /* Perform QR iteration, accumulating Schur vectors in VR * (Workspace: need N+1, prefer N+HSWORK (see comments) ) * - including N reserved for gebal/gebak, unused by dhseqr */ timer_start( time_hseqr ); flops_start( flop_hseqr ); iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi, VR, &ldvr, &work[iwrk], &liwrk, info ); time_sum += timer_stop( time_hseqr ); flop_sum += flops_stop( flop_hseqr ); } else { /* Compute eigenvalues only * (Workspace: need N+1, prefer N+HSWORK (see comments) ) * - including N reserved for gebal/gebak, unused by dhseqr */ timer_start( time_hseqr ); flops_start( flop_hseqr ); iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, wr, wi, VR, &ldvr, &work[iwrk], &liwrk, info ); time_sum += timer_stop( time_hseqr ); flop_sum += flops_stop( flop_hseqr ); } /* If INFO > 0 from DHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } timer_start( time_trevc ); flops_start( flop_trevc ); if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (Workspace: need 4*N, prefer (2 + 2*nb)*N) * - including N reserved for gebal/gebak, unused by dtrevc */ liwrk = lwork - iwrk; #if TREVC_VERSION == 1 lapackf77_dtrevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &ierr ); #elif TREVC_VERSION == 2 lapackf77_dtrevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &ierr ); #elif TREVC_VERSION == 3 magma_dtrevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr ); #elif TREVC_VERSION == 4 magma_dtrevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr ); #elif TREVC_VERSION == 5 magma_dtrevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr ); #else #error Unknown TREVC_VERSION #endif } time_sum += timer_stop( time_trevc ); flop_sum += flops_stop( flop_trevc ); if (wantvl) { /* Undo balancing of left eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n, VL, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( wi[i] == 0. ) { scl = 1. / magma_cblas_dnrm2( n, VL(0,i), 1 ); blasf77_dscal( &n, &scl, VL(0,i), &ione ); } else if ( wi[i] > 0. ) { d__1 = magma_cblas_dnrm2( n, VL(0,i), 1 ); d__2 = magma_cblas_dnrm2( n, VL(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); blasf77_dscal( &n, &scl, VL(0,i), &ione ); blasf77_dscal( &n, &scl, VL(0,i+1), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *VL(k,i); d__2 = *VL(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = blasf77_idamax( &n, &work[iwrk], &ione ) - 1; // subtract 1; k is 0-based lapackf77_dlartg( VL(k,i), VL(k,i+1), &cs, &sn, &r ); blasf77_drot( &n, VL(0,i), &ione, VL(0,i+1), &ione, &cs, &sn ); *VL(k,i+1) = 0.; } } } if (wantvr) { /* Undo balancing of right eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n, VR, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( wi[i] == 0. ) { scl = 1. / magma_cblas_dnrm2( n, VR(0,i), 1 ); blasf77_dscal( &n, &scl, VR(0,i), &ione ); } else if ( wi[i] > 0. ) { d__1 = magma_cblas_dnrm2( n, VR(0,i), 1 ); d__2 = magma_cblas_dnrm2( n, VR(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); blasf77_dscal( &n, &scl, VR(0,i), &ione ); blasf77_dscal( &n, &scl, VR(0,i+1), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *VR(k,i); d__2 = *VR(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = blasf77_idamax( &n, &work[iwrk], &ione ) - 1; // subtract 1; k is 0-based lapackf77_dlartg( VR(k,i), VR(k,i+1), &cs, &sn, &r ); blasf77_drot( &n, VR(0,i), &ione, VR(0,i+1), &ione, &cs, &sn ); *VR(k,i+1) = 0.; } } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { // converged eigenvalues, stored in wr[i+1:n] and wi[i+1:n] for i = INFO magma_int_t nval = n - (*info); magma_int_t ld = max( nval, 1 ); lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr + (*info), &ld, &ierr ); lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi + (*info), &ld, &ierr ); if (*info > 0) { // first ilo columns were already upper triangular, // so the corresponding eigenvalues are also valid. nval = ilo - 1; lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr, &n, &ierr ); lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi, &n, &ierr ); } } #if defined(Version3) || defined(Version4) || defined(Version5) magma_free( dT ); #endif #if defined(Version4) || defined(Version5) magma_free_cpu( T ); #endif timer_stop( time_total ); flops_stop( flop_total ); timer_printf( "dgeev times n %5d, gehrd %7.3f, unghr %7.3f, hseqr %7.3f, trevc %7.3f, total %7.3f, sum %7.3f\n", (int) n, time_gehrd, time_unghr, time_hseqr, time_trevc, time_total, time_sum ); timer_printf( "dgeev flops n %5d, gehrd %7lld, unghr %7lld, hseqr %7lld, trevc %7lld, total %7lld, sum %7lld\n", (int) n, flop_gehrd, flop_unghr, flop_hseqr, flop_trevc, flop_total, flop_sum ); work[0] = MAGMA_D_MAKE( (double) optwrk, 0. ); return *info; } /* magma_dgeev */
/** Purpose ------- CGEEV computes for an N-by-N complex nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments --------- @param[in] jobvl magma_vec_t - = MagmaNoVec: left eigenvectors of A are not computed; - = MagmaVec: left eigenvectors of are computed. @param[in] jobvr magma_vec_t - = MagmaNoVec: right eigenvectors of A are not computed; - = MagmaVec: right eigenvectors of A are computed. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A COMPLEX array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] w COMPLEX array, dimension (N) W contains the computed eigenvalues. @param[out] VL COMPLEX array, dimension (LDVL,N) If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = MagmaNoVec, VL is not referenced. u(j) = VL(:,j), the j-th column of VL. @param[in] ldvl INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = MagmaVec, LDVL >= N. @param[out] VR COMPLEX array, dimension (LDVR,N) If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = MagmaNoVec, VR is not referenced. v(j) = VR(:,j), the j-th column of VR. @param[in] ldvr INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = MagmaVec, LDVR >= N. @param[out] work (workspace) COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. LWORK >= (1+nb)*N. \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param rwork (workspace) REAL array, dimension (2*N) @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. @ingroup magma_cgeev_driver ********************************************************************/ extern "C" magma_int_t magma_cgeev_m( magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n, magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *w, magmaFloatComplex *VL, magma_int_t ldvl, magmaFloatComplex *VR, magma_int_t ldvr, magmaFloatComplex *work, magma_int_t lwork, float *rwork, magma_int_t *info ) { #define VL(i,j) (VL + (i) + (j)*ldvl) #define VR(i,j) (VR + (i) + (j)*ldvr) const magma_int_t ione = 1; const magma_int_t izero = 0; float d__1, d__2; magmaFloatComplex tmp; float scl; float dum[1], eps; float anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb; magma_int_t scalea, minwrk, irwork, lquery, wantvl, wantvr, select[1]; magma_side_t side = MagmaRight; irwork = 0; *info = 0; lquery = (lwork == -1); wantvl = (jobvl == MagmaVec); wantvr = (jobvr == MagmaVec); if (! wantvl && jobvl != MagmaNoVec) { *info = -1; } else if (! wantvr && jobvr != MagmaNoVec) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -8; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -10; } /* Compute workspace */ nb = magma_get_cgehrd_nb( n ); if (*info == 0) { minwrk = (1+nb)*n; work[0] = MAGMA_C_MAKE( minwrk, 0 ); if (lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(Version3) || defined(Version4) || defined(Version5) magmaFloatComplex *dT; if (MAGMA_SUCCESS != magma_cmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif #if defined(Version4) || defined(Version5) magmaFloatComplex *T; if (MAGMA_SUCCESS != magma_cmalloc_cpu( &T, nb*n )) { magma_free( dT ); *info = MAGMA_ERR_HOST_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_slamch( "P" ); smlnum = lapackf77_slamch( "S" ); bignum = 1. / smlnum; lapackf77_slabad( &smlnum, &bignum ); smlnum = magma_ssqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_clange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_clascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (CWorkspace: none) * (RWorkspace: need N) * - this space is reserved until after gebak */ ibal = 0; lapackf77_cgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr ); /* Reduce to upper Hessenberg form * (CWorkspace: need 2*N, prefer N + N*NB) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by cgehrd */ itau = 0; iwrk = itau + n; liwrk = lwork - iwrk; #if defined(Version1) // Version 1 - LAPACK lapackf77_cgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version2) // Version 2 - LAPACK consistent HRD magma_cgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored, magma_cgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #elif defined(Version4) || defined(Version5) // Version 4 - Multi-GPU, T on host magma_cgehrd_m( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, T, &ierr ); magma_csetmatrix( nb, n, T, nb, dT, nb ); #endif if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side = MagmaLeft; lapackf77_clacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl ); /* Generate unitary matrix in VL * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by cunghr */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_cunghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_cunghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_cunghr_m( n, ilo, ihi, VL, ldvl, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VL * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by chseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_chseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w, VL, &ldvl, &work[iwrk], &liwrk, info ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side = MagmaBothSides; lapackf77_clacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side = MagmaRight; lapackf77_clacpy( "L", &n, &n, A, &lda, VR, &ldvr ); /* Generate unitary matrix in VR * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by cunghr */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_cunghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_cunghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_cunghr_m( n, ilo, ihi, VR, ldvr, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VR * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by chseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_chseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w, VR, &ldvr, &work[iwrk], &liwrk, info ); } else { /* Compute eigenvalues only * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by chseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_chseqr( "E", "N", &n, &ilo, &ihi, A, &lda, w, VR, &ldvr, &work[iwrk], &liwrk, info ); } /* If INFO > 0 from CHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (CWorkspace: need 2*N) * (RWorkspace: need 2*N) * - including N reserved for gebal/gebak, unused by ctrevc */ irwork = ibal + n; #if TREVC_VERSION == 1 lapackf77_ctrevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr ); #elif TREVC_VERSION == 2 liwrk = lwork - iwrk; lapackf77_ctrevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &rwork[irwork], &ierr ); #elif TREVC_VERSION == 3 magma_ctrevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr ); #elif TREVC_VERSION == 4 magma_ctrevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr ); #elif TREVC_VERSION == 5 magma_ctrevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr ); #else #error Unknown TREVC_VERSION #endif } if (wantvl) { /* Undo balancing of left eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_cgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n, VL, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / magma_cblas_scnrm2( n, VL(0,i), 1 ); blasf77_csscal( &n, &scl, VL(0,i), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_C_REAL( *VL(k,i) ); d__2 = MAGMA_C_IMAG( *VL(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = blasf77_isamax( &n, &rwork[irwork], &ione ) - 1; // subtract 1; k is 0-based tmp = MAGMA_C_CNJG( *VL(k,i) ) / magma_ssqrt( rwork[irwork + k] ); blasf77_cscal( &n, &tmp, VL(0,i), &ione ); *VL(k,i) = MAGMA_C_MAKE( MAGMA_C_REAL( *VL(k,i) ), 0 ); } } if (wantvr) { /* Undo balancing of right eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_cgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n, VR, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / magma_cblas_scnrm2( n, VR(0,i), 1 ); blasf77_csscal( &n, &scl, VR(0,i), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_C_REAL( *VR(k,i) ); d__2 = MAGMA_C_IMAG( *VR(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = blasf77_isamax( &n, &rwork[irwork], &ione ) - 1; // subtract 1; k is 0-based tmp = MAGMA_C_CNJG( *VR(k,i) ) / magma_ssqrt( rwork[irwork + k] ); blasf77_cscal( &n, &tmp, VR(0,i), &ione ); *VR(k,i) = MAGMA_C_MAKE( MAGMA_C_REAL( *VR(k,i) ), 0 ); } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { // converged eigenvalues, stored in WR[i+1:n] and WI[i+1:n] for i = INFO magma_int_t nval = n - (*info); magma_int_t ld = max( nval, 1 ); lapackf77_clascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w + (*info), &ld, &ierr ); if (*info > 0) { // first ilo columns were already upper triangular, // so the corresponding eigenvalues are also valid. nval = ilo - 1; lapackf77_clascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w, &n, &ierr ); } } #if defined(Version3) || defined(Version4) || defined(Version5) magma_free( dT ); #endif #if defined(Version4) || defined(Version5) magma_free_cpu( T ); #endif work[0] = MAGMA_C_MAKE( (float) minwrk, 0. ); // TODO use optwrk as in dgeev return *info; } /* magma_cgeev */
/* Subroutine */ int dtrsna_(char *job, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublereal *work, integer *ldwork, integer * iwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DTRSNA estimates reciprocal condition numbers for specified eigenvalues and/or right eigenvectors of a real upper quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q orthogonal). T must be in Schur canonical form (as returned by DHSEQR), that is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block has its diagonal elements equal and its off-diagonal elements of opposite sign. Arguments ========= JOB (input) CHARACTER*1 Specifies whether condition numbers are required for eigenvalues (S) or eigenvectors (SEP): = 'E': for eigenvalues only (S); = 'V': for eigenvectors only (SEP); = 'B': for both eigenvalues and eigenvectors (S and SEP). HOWMNY (input) CHARACTER*1 = 'A': compute condition numbers for all eigenpairs; = 'S': compute condition numbers for selected eigenpairs specified by the array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenpairs for which condition numbers are required. To select condition numbers for the eigenpair corresponding to a real eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select condition numbers corresponding to a complex conjugate pair of eigenvalues w(j) and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be set to .TRUE.. If HOWMNY = 'A', SELECT is not referenced. N (input) INTEGER The order of the matrix T. N >= 0. T (input) DOUBLE PRECISION array, dimension (LDT,N) The upper quasi-triangular matrix T, in Schur canonical form. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). VL (input) DOUBLE PRECISION array, dimension (LDVL,M) If JOB = 'E' or 'B', VL must contain left eigenvectors of T (or of any Q*T*Q**T with Q orthogonal), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VL, as returned by DHSEIN or DTREVC. If JOB = 'V', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. VR (input) DOUBLE PRECISION array, dimension (LDVR,M) If JOB = 'E' or 'B', VR must contain right eigenvectors of T (or of any Q*T*Q**T with Q orthogonal), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VR, as returned by DHSEIN or DTREVC. If JOB = 'V', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. S (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'E' or 'B', the reciprocal condition numbers of the selected eigenvalues, stored in consecutive elements of the array. For a complex conjugate pair of eigenvalues two consecutive elements of S are set to the same value. Thus S(j), SEP(j), and the j-th columns of VL and VR all correspond to the same eigenpair (but not in general the j-th eigenpair, unless all eigenpairs are selected). If JOB = 'V', S is not referenced. SEP (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'V' or 'B', the estimated reciprocal condition numbers of the selected eigenvectors, stored in consecutive elements of the array. For a complex eigenvector two consecutive elements of SEP are set to the same value. If the eigenvalues cannot be reordered to compute SEP(j), SEP(j) is set to 0; this can only occur when the true value would be very small anyway. If JOB = 'E', SEP is not referenced. MM (input) INTEGER The number of elements in the arrays S (if JOB = 'E' or 'B') and/or SEP (if JOB = 'V' or 'B'). MM >= M. M (output) INTEGER The number of elements of the arrays S and/or SEP actually used to store the estimated condition numbers. If HOWMNY = 'A', M is set to N. WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,N+1) If JOB = 'E', WORK is not referenced. LDWORK (input) INTEGER The leading dimension of the array WORK. LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. IWORK (workspace) INTEGER array, dimension (N) If JOB = 'E', IWORK is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The reciprocal of the condition number of an eigenvalue lambda is defined as S(lambda) = |v'*u| / (norm(u)*norm(v)) where u and v are the right and left eigenvectors of T corresponding to lambda; v' denotes the conjugate-transpose of v, and norm(u) denotes the Euclidean norm. These reciprocal condition numbers always lie between zero (very badly conditioned) and one (very well conditioned). If n = 1, S(lambda) is defined to be 1. An approximate error bound for a computed eigenvalue W(i) is given by EPS * norm(T) / S(i) where EPS is the machine precision. The reciprocal of the condition number of the right eigenvector u corresponding to lambda is defined as follows. Suppose T = ( lambda c ) ( 0 T22 ) Then the reciprocal condition number is SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) where sigma-min denotes the smallest singular value. We approximate the smallest singular value by the reciprocal of an estimate of the one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is defined to be abs(T(1,1)). An approximate error bound for a computed right eigenvector VR(i) is given by EPS * norm(T) / SEP(i) ===================================================================== Decode and test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static logical c_true = TRUE_; static logical c_false = FALSE_; /* System generated locals */ integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, work_dim1, work_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer kase; static doublereal cond; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static logical pair; static integer ierr; static doublereal dumm, prod; static integer ifst; static doublereal lnrm; static integer ilst; static doublereal rnrm; extern doublereal dnrm2_(integer *, doublereal *, integer *); static doublereal prod1, prod2; static integer i, j, k; static doublereal scale, delta; extern logical lsame_(char *, char *); static logical wants; static doublereal dummy[1]; static integer n2; extern doublereal dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); static doublereal cs; extern doublereal dlamch_(char *); static integer nn, ks; extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal sn, mu; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); static doublereal bignum; static logical wantbh; extern /* Subroutine */ int dlaqtr_(logical *, logical *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), dtrexc_(char *, integer * , doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); static logical somcon; static doublereal smlnum; static logical wantsp; static doublereal eps, est; #define DUMMY(I) dummy[(I)] #define SELECT(I) select[(I)-1] #define S(I) s[(I)-1] #define SEP(I) sep[(I)-1] #define IWORK(I) iwork[(I)-1] #define T(I,J) t[(I)-1 + ((J)-1)* ( *ldt)] #define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)] #define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)] #define WORK(I,J) work[(I)-1 + ((J)-1)* ( *ldwork)] wantbh = lsame_(job, "B"); wants = lsame_(job, "E") || wantbh; wantsp = lsame_(job, "V") || wantbh; somcon = lsame_(howmny, "S"); *info = 0; if (! wants && ! wantsp) { *info = -1; } else if (! lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldvl < 1 || wants && *ldvl < *n) { *info = -8; } else if (*ldvr < 1 || wants && *ldvr < *n) { *info = -10; } else { /* Set M to the number of eigenpairs for which condition number s are required, and test MM. */ if (somcon) { *m = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= *n; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (T(k+1,k) == 0.) { if (SELECT(k)) { ++(*m); } } else { pair = TRUE_; if (SELECT(k) || SELECT(k + 1)) { *m += 2; } } } else { if (SELECT(*n)) { ++(*m); } } } /* L10: */ } } else { *m = *n; } if (*mm < *m) { *info = -13; } else if (*ldwork < 1 || wantsp && *ldwork < *n) { *info = -16; } } if (*info != 0) { i__1 = -(*info); xerbla_("DTRSNA", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (somcon) { if (! SELECT(1)) { return 0; } } if (wants) { S(1) = 1.; } if (wantsp) { SEP(1) = (d__1 = T(1,1), abs(d__1)); } return 0; } /* Get machine constants */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); ks = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= *n; ++k) { /* Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. */ if (pair) { pair = FALSE_; goto L60; } else { if (k < *n) { pair = T(k+1,k) != 0.; } } /* Determine whether condition numbers are required for the k-t h eigenpair. */ if (somcon) { if (pair) { if (! SELECT(k) && ! SELECT(k + 1)) { goto L60; } } else { if (! SELECT(k)) { goto L60; } } } ++ks; if (wants) { /* Compute the reciprocal condition number of the k-th eigenvalue. */ if (! pair) { /* Real eigenvalue. */ prod = ddot_(n, &VR(1,ks), &c__1, &VL(1,ks), &c__1); rnrm = dnrm2_(n, &VR(1,ks), &c__1); lnrm = dnrm2_(n, &VL(1,ks), &c__1); S(ks) = abs(prod) / (rnrm * lnrm); } else { /* Complex eigenvalue. */ prod1 = ddot_(n, &VR(1,ks), &c__1, &VL(1,ks), &c__1); prod1 += ddot_(n, &VR(1,ks+1), &c__1, &VL(1,ks+1), &c__1); prod2 = ddot_(n, &VL(1,ks), &c__1, &VR(1,ks+1), &c__1); prod2 -= ddot_(n, &VL(1,ks+1), &c__1, &VR(1,ks), &c__1); d__1 = dnrm2_(n, &VR(1,ks), &c__1); d__2 = dnrm2_(n, &VR(1,ks+1), &c__1); rnrm = dlapy2_(&d__1, &d__2); d__1 = dnrm2_(n, &VL(1,ks), &c__1); d__2 = dnrm2_(n, &VL(1,ks+1), &c__1); lnrm = dlapy2_(&d__1, &d__2); cond = dlapy2_(&prod1, &prod2) / (rnrm * lnrm); S(ks) = cond; S(ks + 1) = cond; } } if (wantsp) { /* Estimate the reciprocal condition number of the k-th eigenvector. Copy the matrix T to the array WORK and swap the diag onal block beginning at T(k,k) to the (1,1) position. */ dlacpy_("Full", n, n, &T(1,1), ldt, &WORK(1,1), ldwork); ifst = k; ilst = 1; dtrexc_("No Q", n, &WORK(1,1), ldwork, dummy, &c__1, & ifst, &ilst, &WORK(1,*n+1), &ierr); if (ierr == 1 || ierr == 2) { /* Could not swap because blocks not well separat ed */ scale = 1.; est = bignum; } else { /* Reordering successful */ if (WORK(2,1) == 0.) { /* Form C = T22 - lambda*I in WORK(2:N,2:N ). */ i__2 = *n; for (i = 2; i <= *n; ++i) { WORK(i,i) -= WORK(1,1); /* L20: */ } n2 = 1; nn = *n - 1; } else { /* Triangularize the 2 by 2 block by unita ry transformation U = [ cs i*ss ] [ i*ss cs ]. such that the (1,1) position of WORK is complex eigenvalue lambda with positive imagina ry part. (2,2) position of WORK is the complex eigenva lue lambda with negative imaginary part. */ mu = sqrt((d__1 = WORK(1,2), abs(d__1))) * sqrt((d__2 = WORK(2,1), abs(d__2))); delta = dlapy2_(&mu, &WORK(2,1)); cs = mu / delta; sn = -WORK(2,1) / delta; /* Form C' = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ] [ mu ] [ .. ] [ .. ] [ mu ] where C' is conjugate transpose of comp lex matrix C, and RWORK is stored starting in the N+1 -st column of WORK. */ i__2 = *n; for (j = 3; j <= *n; ++j) { WORK(2,j) = cs * WORK(2,j) ; WORK(j,j) -= WORK(1,1); /* L30: */ } WORK(2,2) = 0.; WORK(1,*n+1) = mu * 2.; i__2 = *n - 1; for (i = 2; i <= *n-1; ++i) { WORK(i,*n+1) = sn * WORK(1,i+1); /* L40: */ } n2 = 2; nn = *n - 1 << 1; } /* Estimate norm(inv(C')) */ est = 0.; kase = 0; L50: dlacon_(&nn, &WORK(1,*n+2), &WORK(1,*n+4), &IWORK(1), &est, &kase); if (kase != 0) { if (kase == 1) { if (n2 == 1) { /* Real eigenvalue: solve C' *x = scale*c. */ i__2 = *n - 1; dlaqtr_(&c_true, &c_true, &i__2, &WORK(2,2), ldwork, dummy, &dumm, &scale, &WORK(1,*n+4), &WORK(1,*n+6), &ierr); } else { /* Complex eigenvalue: solve C'*(p+iq) = scale*(c+id) in real arithmetic. */ i__2 = *n - 1; dlaqtr_(&c_true, &c_false, &i__2, &WORK(2,2), ldwork, &WORK(1,*n+1), &mu, &scale, &WORK(1,*n+4), &WORK(1,*n+6), &ierr); } } else { if (n2 == 1) { /* Real eigenvalue: solve C* x = scale*c. */ i__2 = *n - 1; dlaqtr_(&c_false, &c_true, &i__2, &WORK(2,2), ldwork, dummy, & dumm, &scale, &WORK(1,*n+4), &WORK(1,*n+6), & ierr); } else { /* Complex eigenvalue: solve C*(p+iq) = scale*(c+id) i n real arithmetic. */ i__2 = *n - 1; dlaqtr_(&c_false, &c_false, &i__2, &WORK(2,2), ldwork, &WORK(1,*n+1), &mu, &scale, &WORK(1,*n+4), &WORK(1,*n+6), &ierr); } } goto L50; } } SEP(ks) = scale / max(est,smlnum); if (pair) { SEP(ks + 1) = SEP(ks); } } if (pair) { ++ks; } L60: ; } return 0; /* End of DTRSNA */ } /* dtrsna_ */
/* Subroutine */ int zgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublecomplex *work, integer * lwork, doublereal *rwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZGEEVX computes for an N-by-N complex nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. Optionally also, it computes a balancing transformation to improve the conditioning of the eigenvalues and eigenvectors (ILO, IHI, SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues (RCONDE), and reciprocal condition numbers for the right eigenvectors (RCONDV). The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Balancing a matrix means permuting the rows and columns to make it more nearly upper triangular, and applying a diagonal similarity transformation D * A * D**(-1), where D is a diagonal matrix, to make its rows and columns closer in norm and the condition numbers of its eigenvalues and eigenvectors smaller. The computed reciprocal condition numbers correspond to the balanced matrix. Permuting rows and columns will not change the condition numbers (in exact arithmetic) but diagonal scaling will. For further explanation of balancing, see section 4.10.2 of the LAPACK Users' Guide. Arguments ========= BALANC (input) CHARACTER*1 Indicates how the input matrix should be diagonally scaled and/or permuted to improve the conditioning of its eigenvalues. = 'N': Do not diagonally scale or permute; = 'P': Perform permutations to make the matrix more nearly upper triangular. Do not diagonally scale; = 'S': Diagonally scale the matrix, ie. replace A by D*A*D**(-1), where D is a diagonal matrix chosen to make the rows and columns of A more equal in norm. Do not permute; = 'B': Both diagonally scale and permute A. Computed reciprocal condition numbers will be for the matrix after balancing and/or permuting. Permuting does not change condition numbers (in exact arithmetic), but balancing does. JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of A are computed. If SENSE = 'E' or 'B', JOBVL must = 'V'. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. If SENSE = 'E' or 'B', JOBVR must = 'V'. SENSE (input) CHARACTER*1 Determines which reciprocal condition numbers are computed. = 'N': None are computed; = 'E': Computed for eigenvalues only; = 'V': Computed for right eigenvectors only; = 'B': Computed for eigenvalues and right eigenvectors. If SENSE = 'E' or 'B', both left and right eigenvectors must also be computed (JOBVL = 'V' and JOBVR = 'V'). N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. If JOBVL = 'V' or JOBVR = 'V', A contains the Schur form of the balanced version of the matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). W (output) COMPLEX*16 array, dimension (N) W contains the computed eigenvalues. VL (output) COMPLEX*16 array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) COMPLEX*16 array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. ILO,IHI (output) INTEGER ILO and IHI are integer values determined when A was balanced. The balanced A(i,j) = 0 if I > J and J = 1,...,ILO-1 or I = IHI+1,...,N. SCALE (output) DOUBLE PRECISION array, dimension (N) Details of the permutations and scaling factors applied when balancing A. If P(j) is the index of the row and column interchanged with row and column j, and D(j) is the scaling factor applied to row and column j, then SCALE(J) = P(J), for J = 1,...,ILO-1 = D(J), for J = ILO,...,IHI = P(J) for J = IHI+1,...,N. The order in which the interchanges are made is N to IHI+1, then 1 to ILO-1. ABNRM (output) DOUBLE PRECISION The one-norm of the balanced matrix (the maximum of the sum of absolute values of elements of any column). RCONDE (output) DOUBLE PRECISION array, dimension (N) RCONDE(j) is the reciprocal condition number of the j-th eigenvalue. RCONDV (output) DOUBLE PRECISION array, dimension (N) RCONDV(j) is the reciprocal condition number of the j-th right eigenvector. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If SENSE = 'N' or 'E', LWORK >= max(1,2*N), and if SENSE = 'V' or 'B', LWORK >= N*N+2*N. For good performance, LWORK must generally be larger. RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors or condition numbers have been computed; elements 1:ILO-1 and i+1:N of W contain eigenvalues which have converged. ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c__8 = 8; static integer c_n1 = -1; static integer c__4 = 4; /* System generated locals */ integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2; doublecomplex z__1, z__2; /* Builtin functions */ double sqrt(doublereal), d_imag(doublecomplex *); void d_cnjg(doublecomplex *, doublecomplex *); /* Local variables */ static char side[1]; static integer maxb; static doublereal anrm; static integer ierr, itau, iwrk, nout, i, k, icond; extern logical lsame_(char *, char *); extern /* Subroutine */ int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *); static logical scalea; extern doublereal dlamch_(char *); static doublereal cscale; extern /* Subroutine */ int dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), zgebak_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublecomplex *, integer *, integer *), zgebal_(char *, integer *, doublecomplex *, integer *, integer *, integer *, doublereal *, integer *); extern integer idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static logical select[1]; extern /* Subroutine */ int zdscal_(integer *, doublereal *, doublecomplex *, integer *); static doublereal bignum; extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, integer *, doublereal *); extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublecomplex *, integer *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer minwrk, maxwrk; static logical wantvl, wntsnb; static integer hswork; static logical wntsne; static doublereal smlnum; extern /* Subroutine */ int zhseqr_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); static logical wantvr; extern /* Subroutine */ int ztrevc_(char *, char *, logical *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, doublecomplex *, doublereal *, integer *), ztrsna_(char *, char *, logical *, integer *, doublecomplex *, integer *, doublecomplex * , integer *, doublecomplex *, integer *, doublereal *, doublereal *, integer *, integer *, doublecomplex *, integer *, doublereal *, integer *), zunghr_(integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *); static logical wntsnn, wntsnv; static char job[1]; static doublereal scl, dum[1], eps; static doublecomplex tmp; #define DUM(I) dum[(I)] #define W(I) w[(I)-1] #define SCALE(I) scale[(I)-1] #define RCONDE(I) rconde[(I)-1] #define RCONDV(I) rcondv[(I)-1] #define WORK(I) work[(I)-1] #define RWORK(I) rwork[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)] #define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)] *info = 0; wantvl = lsame_(jobvl, "V"); wantvr = lsame_(jobvr, "V"); wntsnn = lsame_(sense, "N"); wntsne = lsame_(sense, "E"); wntsnv = lsame_(sense, "V"); wntsnb = lsame_(sense, "B"); if (! (lsame_(balanc, "N") || lsame_(balanc, "S") || lsame_(balanc, "P") || lsame_(balanc, "B"))) { *info = -1; } else if (! wantvl && ! lsame_(jobvl, "N")) { *info = -2; } else if (! wantvr && ! lsame_(jobvr, "N")) { *info = -3; } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) && ! (wantvl && wantvr)) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || wantvl && *ldvl < *n) { *info = -10; } else if (*ldvr < 1 || wantvr && *ldvr < *n) { *info = -12; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. CWorkspace refers to complex workspace, and RWorkspace to real workspace. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV. HSWORK refers to the workspace preferred by ZHSEQR, as calculated below. HSWORK is computed assuming ILO=1 and IHI=N, the worst case.) */ minwrk = 1; if (*info == 0 && *lwork >= 1) { maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &c__0, 6L, 1L); if (! wantvl && ! wantvr) { /* Computing MAX */ i__1 = 1, i__2 = *n << 1; minwrk = max(i__1,i__2); if (! (wntsnn || wntsne)) { /* Computing MAX */ i__1 = minwrk, i__2 = *n * *n + (*n << 1); minwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = ilaenv_(&c__8, "ZHSEQR", "SN", n, &c__1, n, &c_n1, 6L, 2L); maxb = max(i__1,2); if (wntsnn) { /* Computing MIN Computing MAX */ i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, & c_n1, 6L, 2L); i__1 = min(maxb,*n), i__2 = max(i__3,i__4); k = min(i__1,i__2); } else { /* Computing MIN Computing MAX */ i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "SN", n, &c__1, n, & c_n1, 6L, 2L); i__1 = min(maxb,*n), i__2 = max(i__3,i__4); k = min(i__1,i__2); } /* Computing MAX */ i__1 = k * (k + 2), i__2 = *n << 1; hswork = max(i__1,i__2); /* Computing MAX */ i__1 = max(maxwrk,1); maxwrk = max(i__1,hswork); if (! (wntsnn || wntsne)) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *n + (*n << 1); maxwrk = max(i__1,i__2); } } else { /* Computing MAX */ i__1 = 1, i__2 = *n << 1; minwrk = max(i__1,i__2); if (! (wntsnn || wntsne)) { /* Computing MAX */ i__1 = minwrk, i__2 = *n * *n + (*n << 1); minwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = ilaenv_(&c__8, "ZHSEQR", "SN", n, &c__1, n, &c_n1, 6L, 2L); maxb = max(i__1,2); /* Computing MIN Computing MAX */ i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, & c_n1, 6L, 2L); i__1 = min(maxb,*n), i__2 = max(i__3,i__4); k = min(i__1,i__2); /* Computing MAX */ i__1 = k * (k + 2), i__2 = *n << 1; hswork = max(i__1,i__2); /* Computing MAX */ i__1 = max(maxwrk,1); maxwrk = max(i__1,hswork); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR", " ", n, &c__1, n, &c_n1, 6L, 1L); maxwrk = max(i__1,i__2); if (! (wntsnn || wntsne)) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *n + (*n << 1); maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *n << 1, i__1 = max(i__1,i__2); maxwrk = max(i__1,1); } WORK(1).r = (doublereal) maxwrk, WORK(1).i = 0.; } if (*lwork < minwrk) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("ZGEEVX", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = dlamch_("P"); smlnum = dlamch_("S"); bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ icond = 0; anrm = zlange_("M", n, n, &A(1,1), lda, dum); scalea = FALSE_; if (anrm > 0. && anrm < smlnum) { scalea = TRUE_; cscale = smlnum; } else if (anrm > bignum) { scalea = TRUE_; cscale = bignum; } if (scalea) { zlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &A(1,1), lda, & ierr); } /* Balance the matrix and compute ABNRM */ zgebal_(balanc, n, &A(1,1), lda, ilo, ihi, &SCALE(1), &ierr); *abnrm = zlange_("1", n, n, &A(1,1), lda, dum); if (scalea) { DUM(0) = *abnrm; dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, & ierr); *abnrm = DUM(0); } /* Reduce to upper Hessenberg form (CWorkspace: need 2*N, prefer N+N*NB) (RWorkspace: none) */ itau = 1; iwrk = itau + *n; i__1 = *lwork - iwrk + 1; zgehrd_(n, ilo, ihi, &A(1,1), lda, &WORK(itau), &WORK(iwrk), &i__1, & ierr); if (wantvl) { /* Want left eigenvectors Copy Householder vectors to VL */ *(unsigned char *)side = 'L'; zlacpy_("L", n, n, &A(1,1), lda, &VL(1,1), ldvl); /* Generate unitary matrix in VL (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) (RWorkspace: none) */ i__1 = *lwork - iwrk + 1; zunghr_(n, ilo, ihi, &VL(1,1), ldvl, &WORK(itau), &WORK(iwrk), & i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VL (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = *lwork - iwrk + 1; zhseqr_("S", "V", n, ilo, ihi, &A(1,1), lda, &W(1), &VL(1,1), ldvl, &WORK(iwrk), &i__1, info); if (wantvr) { /* Want left and right eigenvectors Copy Schur vectors to VR */ *(unsigned char *)side = 'B'; zlacpy_("F", n, n, &VL(1,1), ldvl, &VR(1,1), ldvr) ; } } else if (wantvr) { /* Want right eigenvectors Copy Householder vectors to VR */ *(unsigned char *)side = 'R'; zlacpy_("L", n, n, &A(1,1), lda, &VR(1,1), ldvr); /* Generate unitary matrix in VR (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) (RWorkspace: none) */ i__1 = *lwork - iwrk + 1; zunghr_(n, ilo, ihi, &VR(1,1), ldvr, &WORK(itau), &WORK(iwrk), & i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VR (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = *lwork - iwrk + 1; zhseqr_("S", "V", n, ilo, ihi, &A(1,1), lda, &W(1), &VR(1,1), ldvr, &WORK(iwrk), &i__1, info); } else { /* Compute eigenvalues only If condition numbers desired, compute Schur form */ if (wntsnn) { *(unsigned char *)job = 'E'; } else { *(unsigned char *)job = 'S'; } /* (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = *lwork - iwrk + 1; zhseqr_(job, "N", n, ilo, ihi, &A(1,1), lda, &W(1), &VR(1,1), ldvr, &WORK(iwrk), &i__1, info); } /* If INFO > 0 from ZHSEQR, then quit */ if (*info > 0) { goto L50; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors (CWorkspace: need 2*N) (RWorkspace: need N) */ ztrevc_(side, "B", select, n, &A(1,1), lda, &VL(1,1), ldvl, &VR(1,1), ldvr, n, &nout, &WORK(iwrk), &RWORK(1), & ierr); } /* Compute condition numbers if desired (CWorkspace: need N*N+2*N unless SENSE = 'E') (RWorkspace: need 2*N unless SENSE = 'E') */ if (! wntsnn) { ztrsna_(sense, "A", select, n, &A(1,1), lda, &VL(1,1), ldvl, &VR(1,1), ldvr, &RCONDE(1), &RCONDV(1), n, &nout, &WORK(iwrk), n, &RWORK(1), &icond); } if (wantvl) { /* Undo balancing of left eigenvectors */ zgebak_(balanc, "L", n, ilo, ihi, &SCALE(1), n, &VL(1,1), ldvl, &ierr); /* Normalize left eigenvectors and make largest component real */ i__1 = *n; for (i = 1; i <= *n; ++i) { scl = 1. / dznrm2_(n, &VL(1,i), &c__1); zdscal_(n, &scl, &VL(1,i), &c__1); i__2 = *n; for (k = 1; k <= *n; ++k) { i__3 = k + i * vl_dim1; /* Computing 2nd power */ d__1 = VL(k,i).r; /* Computing 2nd power */ d__2 = d_imag(&VL(k,i)); RWORK(k) = d__1 * d__1 + d__2 * d__2; /* L10: */ } k = idamax_(n, &RWORK(1), &c__1); d_cnjg(&z__2, &VL(k,i)); d__1 = sqrt(RWORK(k)); z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; tmp.r = z__1.r, tmp.i = z__1.i; zscal_(n, &tmp, &VL(1,i), &c__1); i__2 = k + i * vl_dim1; i__3 = k + i * vl_dim1; d__1 = VL(k,i).r; z__1.r = d__1, z__1.i = 0.; VL(k,i).r = z__1.r, VL(k,i).i = z__1.i; /* L20: */ } } if (wantvr) { /* Undo balancing of right eigenvectors */ zgebak_(balanc, "R", n, ilo, ihi, &SCALE(1), n, &VR(1,1), ldvr, &ierr); /* Normalize right eigenvectors and make largest component real */ i__1 = *n; for (i = 1; i <= *n; ++i) { scl = 1. / dznrm2_(n, &VR(1,i), &c__1); zdscal_(n, &scl, &VR(1,i), &c__1); i__2 = *n; for (k = 1; k <= *n; ++k) { i__3 = k + i * vr_dim1; /* Computing 2nd power */ d__1 = VR(k,i).r; /* Computing 2nd power */ d__2 = d_imag(&VR(k,i)); RWORK(k) = d__1 * d__1 + d__2 * d__2; /* L30: */ } k = idamax_(n, &RWORK(1), &c__1); d_cnjg(&z__2, &VR(k,i)); d__1 = sqrt(RWORK(k)); z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; tmp.r = z__1.r, tmp.i = z__1.i; zscal_(n, &tmp, &VR(1,i), &c__1); i__2 = k + i * vr_dim1; i__3 = k + i * vr_dim1; d__1 = VR(k,i).r; z__1.r = d__1, z__1.i = 0.; VR(k,i).r = z__1.r, VR(k,i).i = z__1.i; /* L40: */ } } /* Undo scaling if necessary */ L50: if (scalea) { i__1 = *n - *info; /* Computing MAX */ i__3 = *n - *info; i__2 = max(i__3,1); zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &W(*info + 1) , &i__2, &ierr); if (*info == 0) { if ((wntsnv || wntsnb) && icond == 0) { dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &RCONDV( 1), n, &ierr); } } else { i__1 = *ilo - 1; zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &W(1), n, &ierr); } } WORK(1).r = (doublereal) maxwrk, WORK(1).i = 0.; return 0; /* End of ZGEEVX */ } /* zgeevx_ */