void CNE6SSMSusy_high_scale_constraint<Two_scale>::update_scale() { assert(model && "CNE6SSMSusy_high_scale_constraint<Two_scale>::" "update_scale(): model pointer is zero."); const double currentScale = model->get_scale(); const CNE6SSMSusy_susy_parameters beta_functions(model->calc_beta()); const auto g1 = MODELPARAMETER(g1); const auto g2 = MODELPARAMETER(g2); const auto beta_g1 = BETAPARAMETER(g1); const auto beta_g2 = BETAPARAMETER(g2); scale = currentScale*exp((-g1 + g2)/(BETA(g1) - BETA(g2))); if (errno == ERANGE) { #ifdef ENABLE_VERBOSE ERROR("CNE6SSMSusy_high_scale_constraint<Two_scale>: Overflow error" " during calculation of high scale: " << strerror(errno) << '\n' << " current scale = " << currentScale << '\n' << " new scale = " << scale << '\n' << " resetting scale to " << get_initial_scale_guess()); #endif scale = get_initial_scale_guess(); errno = 0; } }
/* * See Figure 11-2, "The Hungarian method", page 251. * * Corresponds to lines 26--27, 38--39. * Called by hm_pre_search() and hm_search(). */ static void hm_update_slack( hm_data *hm, int z ) { int k, tmp; for EACH_U( k ) { tmp = C( z, k ) - ALPHA( z ) - BETA( k ); if ( 0 <= tmp && tmp < SLACK( k ) ) { SLACK( k ) = tmp; /* * The following decrement and increment are necessary to maintain * the count[] array, which is not included in the original Figure * 11-2 implementation, and whose addition and purpose are described * above in hm_construct_auxiliary_graph(). */ if ( NHBOR( k ) != blank ) { --COUNT( NHBOR( k ) ); } ++COUNT( z ); NHBOR( k ) = z; } } }
static float *twiddles_step_64(double two_pi, float *out, double theta) { int i; for (i=0; i<32; i++) { *out++ = ALPHA(theta); *out++ = BETA(theta); } return twiddles_step_32(two_pi, out, 2*theta); }
static float *twiddles_step_16(double two_pi, float *out, double theta) { int i; for (i=0; i<16; i++) { *out++ = ALPHA(theta*k[i]); *out++ = BETA(theta*k[i]); } return out; }
vector compgene(cmatrix& A,cmatrix& B) { int i; int N=A.Rows; int IFAIL=0; int MATV=0; int* ITER=new int[N]; vector ALFR(N); vector ALFI(N); vector BETA(N); double EPS1=0.; matrix AR(N,N); matrix AI(N,N); matrix BR(N,N); matrix BI(N,N); for (i=0;i<N;i++) for (int j=0;j<N;j++) { AR(i,j)=real(A(i,j)); AI(i,j)=imag(A(i,j)); BR(i,j)=real(B(i,j)); BI(i,j)=imag(B(i,j)); } matrix VR(N,N); matrix VI(N,N); /* f02gjf_(&N,AR.TheMatrix,&N,AI.TheMatrix,&N,BR.TheMatrix,&N, BI.TheMatrix,&N,&EPS1,ALFR.TheVector,ALFI.TheVector, BETA.TheVector,&MATV,VR.TheMatrix,&N, VI.TheMatrix,&N,ITER,&IFAIL); */ cerr<<"compgene to be implemented"<<endl; exit(1); if (IFAIL != 0) cerr <<"error in compgene "<<endl; if (!ALFR.TheVector) exit(1); for (i=0;i<N;i++) ALFR(i)=ALFR(i)/BETA(i); // sort ! char ORDER='A'; int M1=1; int M2=N; /* m01caf_(ALFR.TheVector,&M1,&M2,&ORDER,&IFAIL);*/ cerr<<"sort to be implemented"<<endl; exit(1); if (IFAIL != 0) cerr <<"error in m01caf_ "<<endl; return ALFR; }
/* * See Figure 11-2, "The Hungarian method", page 251. * * Corresponds to lines 7--8. */ static void hm_initialize( hm_data *hm ) { int i, j; for EACH_V( i ) { MATE( i ) = blank; ALPHA( i ) = 0; } for EACH_U( j ) { MATE( j ) = blank; BETA( j ) = INT_MAX; for EACH_V( i ) { if ( C( i, j ) < BETA( j ) ) { BETA( j ) = C( i, j ); } } } }
/* * See Figure 11-2, "The Hungarian method", page 251. * * Corresponds to lines 12--17. */ static void hm_construct_auxiliary_graph( hm_data *hm ) { int i, j; A.size = 0; for EACH_V( i ) { EXPOSED( i ) = blank; LABEL( i ) = blank; /* * The following data structure is not included in the Figure 11-2 * pseudo-code implementation. It has been added to account for * "labeling" on certain vertices described within Example 11.1 that * would otherwise be missing from the Figure 11-2 implementation. * * count[v] for any v \in V is equal to the size of the set * { u \in U : nhbor[u] = v }. When this set is non-empty, v is * considered to be "labeled". The use of this new data structure is * only to complete the conditional check on "labeled" statuses when * updating alpha within "procedure modify". */ COUNT( i ) = 0; } for EACH_U( j ) { SLACK( j ) = INT_MAX; /* * The following initialization of nhbor[] is necessary for proper usage * of the count[] array, whose addition and purpose is described above. */ NHBOR( j ) = blank; } for EACH_V( i ) { for EACH_U( j ) { if ( ALPHA( i ) + BETA( j ) == C( i, j ) ) { if ( MATE( j ) == blank ) { EXPOSED( i ) = j; } else if ( i != MATE( j ) ) { add_arc( &A, i, MATE( j ) ); } } } } }
/* * This function is for debugging purposes. It prints the algorithm's internal * state in a format similar to that of Example 11.1 (The matrix form of the * Hungarian method) beginning on page 252. * * The formatted output coded here is intended for small numbers. */ static void hm_print( hm_data *hm ) { int i, j, k; printf( "\n a\\b |" ); for EACH_U( j ) { printf( "%3d ", BETA( j ) ); } printf( "mate exposed label\n" ); printf( "-----+" ); for EACH_U( j ) { printf( "----" ); } printf( "------------------\n" ); for EACH_V( i ) { printf( " |\n %3d |", ALPHA( i ) ); for EACH_U( j ) { printf( "%3d ", C( i, j ) ); } printf( "%4d %7d %5d\n", MATE( i ), EXPOSED( i ), LABEL( i ) ); } printf( "\nslack" ); for EACH_U( j ) { printf( " %3d", SLACK( j ) == INT_MAX ? -1 : SLACK( j ) ); } printf( "\nnhbor" ); for EACH_U( j ) { printf( " %3d", NHBOR( j ) ); } printf( "\n\nA = { " ); for ( k = 0; k < A.size; ++k ) { printf( "(%d,%d) ", A.data[ k ].x, A.data[ k ].y ); } printf( "}\nQ = { " ); for ( k = 0; k < Q.size; ++k ) { printf( "%d ", Q.data[ k ] ); } printf( "}\n\n" ); }
real do_optimize_poisson(FILE *log, bool bVerbose, t_inputrec *ir, int natoms, rvec x[], rvec f[], real charge[], rvec box, real phi[], t_commrec *cr, t_nrnb *nrnb, rvec f_ref[], real phi_ref[], rvec beta, bool bOld) { #define BMIN 1.6 #define DB 0.025 #define NB (1+(int)((2.1-BMIN)/DB)) static bool bFirst = TRUE; static bool bSecond = TRUE; static t_PSgrid *pot,*rho; static int maxnit; static real r1,rc; real rmsf[NB][NB][NB],rmsf_min,rrmsf; ivec minimum; const real tol = 1e-2; int i,m,bx,by,bz; char buf[128]; real ener; ener = 0.0; if (bFirst) { maxnit = ir->userint1; fprintf(log,"Will use Poisson Solver for long-range electrostatics\n"); fprintf(log,"Grid size is %d x %d x %d\n",ir->nkx,ir->nky,ir->nkz); if ((ir->nkx < 4) || (ir->nky < 4) || (ir->nkz < 4)) fatal_error(0,"Grid must be at least 4 points in all directions"); pot = mk_PSgrid(ir->nkx,ir->nky,ir->nkz); rho = mk_PSgrid(ir->nkx,ir->nky,ir->nkz); r1 = ir->rcoulomb_switch; rc = ir->rcoulomb; for(m=0; (m<DIM); m++) beta[m] = 4.0/3.0; bFirst = FALSE; } /* Make the grid empty */ clear_PSgrid(rho); spread_q_poisson(log,bVerbose,TRUE,natoms,x,charge,box,rc,rho,nrnb, bOld,r1); symmetrize_PSgrid(log,rho,0.0); if (bSecond) copy_PSgrid(pot,rho); /* Second step: solving the poisson equation in real space */ (void) solve_poisson(log,pot,rho,bVerbose,nrnb,maxnit,tol,box); symmetrize_PSgrid(log,pot,0.0); /* Third and last step: gather the forces, energies and potential * from the grid. */ #define BETA(n) (BMIN+n*DB) /* Optimization of beta in progress */ for(bx=0; (bx<NB); bx++) { beta[XX] = BETA(bx); for(by=0; (by<NB); by++) { beta[YY] = BETA(by); for(bz=0; (bz<NB); bz++) { beta[ZZ] = BETA(bz); for(i=0; (i<natoms); i++) { phi[i] = 0.0; clear_rvec(f[i]); } ener = ps_gather_f(log,bVerbose,natoms,x,f,charge,box, phi,pot,beta,nrnb); sprintf(buf,"Poisson, beta = %g\n",beta[XX]); rmsf[bx][by][bz] = analyse_diff(log,buf,natoms,f_ref,f,phi_ref,phi,NULL, /*"fcorr.xvg","pcorr.xvg"*/NULL,NULL,NULL,NULL); } } } rmsf_min = rmsf[0][0][0]; minimum[XX] = minimum[YY] = minimum[ZZ] = 0; for(bx=0; (bx<NB); bx++) { beta[XX] = BETA(bx); for(by=0; (by<NB); by++) { beta[YY] = BETA(by); for(bz=0; (bz<NB); bz++) { beta[ZZ] = BETA(bz); rrmsf = rmsf[bx][by][bz]; fprintf(log,"Beta: %6.3f %6.3f %6.3f RMSF: %8.3f\n", beta[XX],beta[YY],beta[ZZ],rrmsf); if (rrmsf < rmsf_min) { rmsf_min = rrmsf; minimum[XX] = bx; minimum[YY] = by; minimum[ZZ] = bz; } } } } beta[XX] = BETA(minimum[XX]); beta[YY] = BETA(minimum[YY]); beta[ZZ] = BETA(minimum[ZZ]); fprintf(log,"Minimum RMSF %8.3f at Beta = %6.3f %6.3f %6.3f\n", rmsf_min,beta[XX],beta[YY],beta[ZZ]); /* Computing optimum once more... */ for(i=0; (i<natoms); i++) { phi[i] = 0.0; clear_rvec(f[i]); } ener = ps_gather_f(log,bVerbose,natoms,x,f,charge,box,phi,pot,beta,nrnb); return ener; }
/* * See Figure 11-2, "The Hungarian method", page 252. * * Corresponds to "procedure modify". */ static bool hm_modify( hm_data *hm ) { int i, j, theta_one; /* * Determine theta_one. */ theta_one = INT_MAX; for EACH_U( j ) { if ( 0 < SLACK( j ) && SLACK( j ) < theta_one ) { theta_one = SLACK( j ); } } theta_one /= 2; /* * Update the dual variable alpha. */ for EACH_V( i ) { /* * The following conditional expression has been changed from its form * in Figure 11-2. Here, an additional check on the count[] array is * performed to account for a certain type of "labeling" that is * mentioned in the Example 11.1 walk-through but is omitted from the * Figure 11-2 implementation. * * See the comments provided near the initialization of count[] in the * function hm_construct_auxiliary_graph(). */ if ( LABEL( i ) != blank || COUNT( i ) > 0 ) { ALPHA( i ) += theta_one; } else { ALPHA( i ) -= theta_one; } } /* * Update the dual variable beta. */ for EACH_U( j ) { if ( SLACK( j ) == 0 ) { BETA( j ) -= theta_one; } else { BETA( j ) += theta_one; } } /* * Update slack and check for new admissible edges. */ for EACH_U( j ) { if ( SLACK( j ) > 0 ) { SLACK( j ) -= 2 * theta_one; if ( SLACK( j ) == 0 ) { if ( MATE( j ) == blank ) { EXPOSED( NHBOR( j ) ) = j; hm_augment( hm, NHBOR( j ) ); return false; /* goto endstage */ } else { /* * The following statement corresponds to a pseudo-code * command that should be removed from the else-clause of * the modify procedure in Figure 11-2. * * LABEL( MATE( j ) ) = NHBOR( j ); * * The inclusion of the above statement causes the arc * added in one of the next statements to never be considered * in following "search" sub-stages during this stage, and it * partially duplicates what would happen in these sub-stages * if the arc were to be considered there. The result of * inclusion is (often) non-optimality of the algorithm's * output. */ /* * The next statement corresponds to a pseudo-code command * (in the same else-clause) that should be modified * slightly. In Figure 11-2, this command "pushes" mate[ u ] * into Q when it should be "pushing" nhbor[ u ] instead. * This is because the purpose of this command is to ensure * that the soon-to-be-added arc will be considered in the * next "search" sub-stage, and consideration is dependent * upon the arc-tail, not the arc-head. */ stack_push( &Q, NHBOR( j ) ); /* Note modification */ add_arc( &A, NHBOR( j ), MATE( j ) ); } } } } return true; }
void MAST::GCMMAOptimizationInterface::optimize() { #if MAST_ENABLE_GCMMA == 1 // make sure that all processes have the same problem setup _feval->sanitize_parallel(); int N = _feval->n_vars(), M = _feval->n_eq() + _feval->n_ineq(), n_rel_change_iters = _feval->n_iters_relative_change(); libmesh_assert_greater(N, 0); std::vector<Real> XVAL(N, 0.), XOLD1(N, 0.), XOLD2(N, 0.), XMMA(N, 0.), XMIN(N, 0.), XMAX(N, 0.), XLOW(N, 0.), XUPP(N, 0.), ALFA(N, 0.), BETA(N, 0.), DF0DX(N, 0.), A(M, 0.), B(M, 0.), C(M, 0.), Y(M, 0.), RAA(M, 0.), ULAM(M, 0.), FVAL(M, 0.), FAPP(M, 0.), FNEW(M, 0.), FMAX(M, 0.), DFDX(M*N, 0.), P(M*N, 0.), Q(M*N, 0.), P0(N, 0.), Q0(N, 0.), UU(M, 0.), GRADF(M, 0.), DSRCH(M, 0.), HESSF(M*(M+1)/2, 0.), f0_iters(n_rel_change_iters); std::vector<int> IYFREE(M, 0); std::vector<bool> eval_grads(M, false); Real ALBEFA = 0.1, GHINIT = 0.5, GHDECR = 0.7, GHINCR = 1.2, F0VAL = 0., F0NEW = 0., F0APP = 0., RAA0 = 0., Z = 0., GEPS =_feval->tolerance(); /*C********+*********+*********+*********+*********+*********+*********+ C C The meaning of some of the scalars and vectors in the program: C C N = Complex of variables x_j in the problem. C M = Complex of constraints in the problem (not including C the simple upper and lower bounds on the variables). C ALBEFA = Relative spacing between asymptote and mode limit. Lower value C will cause the move limit (alpha,beta) to move closer to asymptote C values (l, u). C GHINIT = Initial asymptote setting. For the first two iterations the C asymptotes (l, u) are defined based on offsets from the design C point as this fraction of the design variable bounds, ie. C l_j = x_j^k - GHINIT * (x_j^max - x_j^min) C u_j = x_j^k + GHINIT * (x_j^max - x_j^min) C GHDECR = Fraction by which the asymptote is reduced for oscillating C changes in design variables based on three consecutive iterations C GHINCR = Fraction by which the asymptote is increased for non-oscillating C changes in design variables based on three consecutive iterations C INNMAX = Maximal number of inner iterations within each outer iter. C A reasonable choice is INNMAX=10. C ITER = Current outer iteration number ( =1 the first iteration). C GEPS = Tolerance parameter for the constraints. C (Used in the termination criteria for the subproblem.) C C XVAL(j) = Current value of the variable x_j. C XOLD1(j) = Value of the variable x_j one iteration ago. C XOLD2(j) = Value of the variable x_j two iterations ago. C XMMA(j) = Optimal value of x_j in the MMA subproblem. C XMIN(j) = Original lower bound for the variable x_j. C XMAX(j) = Original upper bound for the variable x_j. C XLOW(j) = Value of the lower asymptot l_j. C XUPP(j) = Value of the upper asymptot u_j. C ALFA(j) = Lower bound for x_j in the MMA subproblem. C BETA(j) = Upper bound for x_j in the MMA subproblem. C F0VAL = Value of the objective function f_0(x) C FVAL(i) = Value of the i:th constraint function f_i(x). C DF0DX(j) = Derivative of f_0(x) with respect to x_j. C FMAX(i) = Right hand side of the i:th constraint. C DFDX(k) = Derivative of f_i(x) with respect to x_j, C where k = (j-1)*M + i. C P(k) = Coefficient p_ij in the MMA subproblem, where C k = (j-1)*M + i. C Q(k) = Coefficient q_ij in the MMA subproblem, where C k = (j-1)*M + i. C P0(j) = Coefficient p_0j in the MMA subproblem. C Q0(j) = Coefficient q_0j in the MMA subproblem. C B(i) = Right hand side b_i in the MMA subproblem. C F0APP = Value of the approximating objective function C at the optimal soultion of the MMA subproblem. C FAPP(i) = Value of the approximating i:th constraint function C at the optimal soultion of the MMA subproblem. C RAA0 = Parameter raa_0 in the MMA subproblem. C RAA(i) = Parameter raa_i in the MMA subproblem. C Y(i) = Value of the "artificial" variable y_i. C Z = Value of the "minimax" variable z. C A(i) = Coefficient a_i for the variable z. C C(i) = Coefficient c_i for the variable y_i. C ULAM(i) = Value of the dual variable lambda_i. C GRADF(i) = Gradient component of the dual objective function. C DSRCH(i) = Search direction component in the dual subproblem. C HESSF(k) = Hessian matrix component of the dual function. C IYFREE(i) = 0 for dual variables which are fixed to zero in C the current subspace of the dual subproblem, C = 1 for dual variables which are "free" in C the current subspace of the dual subproblem. C C********+*********+*********+*********+*********+*********+*********+*/ /* * The USER should now give values to the parameters * M, N, GEPS, XVAL (starting point), * XMIN, XMAX, FMAX, A and C. */ // _initi(M,N,GEPS,XVAL,XMIN,XMAX,FMAX,A,C); // Assumed: FMAX == A _feval->_init_dvar_wrapper(XVAL, XMIN, XMAX); // set the value of C[i] to be very large numbers Real max_x = 0.; for (unsigned int i=0; i<N; i++) if (max_x < fabs(XVAL[i])) max_x = fabs(XVAL[i]); std::fill(C.begin(), C.end(), std::max(1.e0*max_x, _constr_penalty)); int INNMAX=_max_inner_iters, ITER=0, ITE=0, INNER=0, ICONSE=0; /* * The outer iterative process starts. */ bool terminate = false, inner_terminate=false; while (!terminate) { ITER=ITER+1; ITE=ITE+1; /* * The USER should now calculate function values and gradients * at XVAL. The result should be put in F0VAL,DF0DX,FVAL,DFDX. */ std::fill(eval_grads.begin(), eval_grads.end(), true); _feval->_evaluate_wrapper(XVAL, F0VAL, true, DF0DX, FVAL, eval_grads, DFDX); if (ITER == 1) // output the very first iteration _feval->_output_wrapper(0, XVAL, F0VAL, FVAL, true); /* * RAA0,RAA,XLOW,XUPP,ALFA and BETA are calculated. */ raasta_(&M, &N, &RAA0, &RAA[0], &XMIN[0], &XMAX[0], &DF0DX[0], &DFDX[0]); asympg_(&ITER, &M, &N, &ALBEFA, &GHINIT, &GHDECR, &GHINCR, &XVAL[0], &XMIN[0], &XMAX[0], &XOLD1[0], &XOLD2[0], &XLOW[0], &XUPP[0], &ALFA[0], &BETA[0]); /* * The inner iterative process starts. */ // write the asymptote data for the inneriterations _output_iteration_data(ITER, XVAL, XMIN, XMAX, XLOW, XUPP, ALFA, BETA); INNER=0; inner_terminate = false; while (!inner_terminate) { /* * The subproblem is generated and solved. */ mmasug_(&ITER, &M, &N, &GEPS, &IYFREE[0], &XVAL[0], &XMMA[0], &XMIN[0], &XMAX[0], &XLOW[0], &XUPP[0], &ALFA[0], &BETA[0], &A[0], &B[0], &C[0], &Y[0], &Z, &RAA0, &RAA[0], &ULAM[0], &F0VAL, &FVAL[0], &F0APP, &FAPP[0], &FMAX[0], &DF0DX[0], &DFDX[0], &P[0], &Q[0], &P0[0], &Q0[0], &UU[0], &GRADF[0], &DSRCH[0], &HESSF[0]); /* * The USER should now calculate function values at XMMA. * The result should be put in F0NEW and FNEW. */ std::fill(eval_grads.begin(), eval_grads.end(), false); _feval->_evaluate_wrapper(XMMA, F0NEW, false, DF0DX, FNEW, eval_grads, DFDX); if (INNER >= INNMAX) { libMesh::out << "** Max Inner Iter Reached: Terminating! Inner Iter = " << INNER << std::endl; inner_terminate = true; } else { /* * It is checked if the approximations were conservative. */ conser_( &M, &ICONSE, &GEPS, &F0NEW, &F0APP, &FNEW[0], &FAPP[0]); if (ICONSE == 1) { libMesh::out << "** Conservative Solution: Terminating! Inner Iter = " << INNER << std::endl; inner_terminate = true; } else { /* * The approximations were not conservative, so RAA0 and RAA * are updated and one more inner iteration is started. */ INNER=INNER+1; raaupd_( &M, &N, &GEPS, &XMMA[0], &XVAL[0], &XMIN[0], &XMAX[0], &XLOW[0], &XUPP[0], &F0NEW, &FNEW[0], &F0APP, &FAPP[0], &RAA0, &RAA[0]); } } } /* * The inner iterative process has terminated, which means * that an outer iteration has been completed. * The variables are updated so that XVAL stands for the new * outer iteration point. The fuction values are also updated. */ xupdat_( &N, &ITER, &XMMA[0], &XVAL[0], &XOLD1[0], &XOLD2[0]); fupdat_( &M, &F0NEW, &FNEW[0], &F0VAL, &FVAL[0]); /* * The USER may now write the current solution. */ _feval->_output_wrapper(ITER, XVAL, F0VAL, FVAL, true); f0_iters[(ITE-1)%n_rel_change_iters] = F0VAL; /* * One more outer iteration is started as long as * ITE is less than MAXITE: */ if (ITE == _feval->max_iters()) { libMesh::out << "GCMMA: Reached maximum iterations, terminating! " << std::endl; terminate = true; } // relative change in objective bool rel_change_conv = true; Real f0_curr = f0_iters[n_rel_change_iters-1]; for (unsigned int i=0; i<n_rel_change_iters-1; i++) { if (f0_curr > sqrt(GEPS)) rel_change_conv = (rel_change_conv && fabs(f0_iters[i]-f0_curr)/fabs(f0_curr) < GEPS); else rel_change_conv = (rel_change_conv && fabs(f0_iters[i]-f0_curr) < GEPS); } if (rel_change_conv) { libMesh::out << "GCMMA: Converged relative change tolerance, terminating! " << std::endl; terminate = true; } } #endif //MAST_ENABLE_GCMMA == 1 }
/* Subroutine */ int dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer *ldq, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGGSVD computes the generalized singular value decomposition (GSVD) of an M-by-N real matrix A and P-by-N real matrix B: U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ) where U, V and Q are orthogonal matrices, and Z' is the transpose of Z. Let K+L = the effective numerical rank of the matrix (A',B')', then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the following structures, respectively: If M-K-L >= 0, K L D1 = K ( I 0 ) L ( 0 C ) M-K-L ( 0 0 ) K L D2 = L ( 0 S ) P-L ( 0 0 ) N-K-L K L ( 0 R ) = K ( 0 R11 R12 ) L ( 0 0 R22 ) where C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), S = diag( BETA(K+1), ... , BETA(K+L) ), C**2 + S**2 = I. R is stored in A(1:K+L,N-K-L+1:N) on exit. If M-K-L < 0, K M-K K+L-M D1 = K ( I 0 0 ) M-K ( 0 C 0 ) K M-K K+L-M D2 = M-K ( 0 S 0 ) K+L-M ( 0 0 I ) P-L ( 0 0 0 ) N-K-L K M-K K+L-M ( 0 R ) = K ( 0 R11 R12 R13 ) M-K ( 0 0 R22 R23 ) K+L-M ( 0 0 0 R33 ) where C = diag( ALPHA(K+1), ... , ALPHA(M) ), S = diag( BETA(K+1), ... , BETA(M) ), C**2 + S**2 = I. (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored ( 0 R22 R23 ) in B(M-K+1:L,N+M-K-L+1:N) on exit. The routine computes C, S, R, and optionally the orthogonal transformation matrices U, V and Q. In particular, if B is an N-by-N nonsingular matrix, then the GSVD of A and B implicitly gives the SVD of A*inv(B): A*inv(B) = U*(D1*inv(D2))*V'. If ( A',B')' has orthonormal columns, then the GSVD of A and B is also equal to the CS decomposition of A and B. Furthermore, the GSVD can be used to derive the solution of the eigenvalue problem: A'*A x = lambda* B'*B x. In some literature, the GSVD of A and B is presented in the form U'*A*X = ( 0 D1 ), V'*B*X = ( 0 D2 ) where U and V are orthogonal and X is nonsingular, D1 and D2 are ``diagonal''. The former GSVD form can be converted to the latter form by taking the nonsingular matrix X as X = Q*( I 0 ) ( 0 inv(R) ). Arguments ========= JOBU (input) CHARACTER*1 = 'U': Orthogonal matrix U is computed; = 'N': U is not computed. JOBV (input) CHARACTER*1 = 'V': Orthogonal matrix V is computed; = 'N': V is not computed. JOBQ (input) CHARACTER*1 = 'Q': Orthogonal matrix Q is computed; = 'N': Q is not computed. M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrices A and B. N >= 0. P (input) INTEGER The number of rows of the matrix B. P >= 0. K (output) INTEGER L (output) INTEGER On exit, K and L specify the dimension of the subblocks described in the Purpose section. K + L = effective numerical rank of (A',B')'. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A contains the triangular matrix R, or part of R. See Purpose for details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) DOUBLE PRECISION array, dimension (LDB,N) On entry, the P-by-N matrix B. On exit, B contains the triangular matrix R if M-K-L < 0. See Purpose for details. LDB (input) INTEGER The leading dimension of the array B. LDA >= max(1,P). ALPHA (output) DOUBLE PRECISION array, dimension (N) BETA (output) DOUBLE PRECISION array, dimension (N) On exit, ALPHA and BETA contain the generalized singular value pairs of A and B; ALPHA(1:K) = 1, BETA(1:K) = 0, and if M-K-L >= 0, ALPHA(K+1:K+L) = C, BETA(K+1:K+L) = S, or if M-K-L < 0, ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 BETA(K+1:M) =S, BETA(M+1:K+L) =1 and ALPHA(K+L+1:N) = 0 BETA(K+L+1:N) = 0 U (output) DOUBLE PRECISION array, dimension (LDU,M) If JOBU = 'U', U contains the M-by-M orthogonal matrix U. If JOBU = 'N', U is not referenced. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,M) if JOBU = 'U'; LDU >= 1 otherwise. V (output) DOUBLE PRECISION array, dimension (LDV,P) If JOBV = 'V', V contains the P-by-P orthogonal matrix V. If JOBV = 'N', V is not referenced. LDV (input) INTEGER The leading dimension of the array V. LDV >= max(1,P) if JOBV = 'V'; LDV >= 1 otherwise. Q (output) DOUBLE PRECISION array, dimension (LDQ,N) If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. If JOBQ = 'N', Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N) if JOBQ = 'Q'; LDQ >= 1 otherwise. WORK (workspace) DOUBLE PRECISION array, dimension (max(3*N,M,P)+N) IWORK (workspace) INTEGER array, dimension (N) INFO (output)INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, the Jacobi-type procedure failed to converge. For further details, see subroutine DTGSJA. Internal Parameters =================== TOLA DOUBLE PRECISION TOLB DOUBLE PRECISION TOLA and TOLB are the thresholds to determine the effective rank of (A',B')'. Generally, they are set to TOLA = MAX(M,N)*norm(A)*MAZHEPS, TOLB = MAX(P,N)*norm(B)*MAZHEPS. The size of TOLA and TOLB may affect the size of backward errors of the decomposition. ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1; /* Local variables */ static doublereal tola, tolb, unfl; extern logical lsame_(char *, char *); static doublereal anorm, bnorm; static logical wantq, wantu, wantv; extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dtgsja_(char *, char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); static integer ncycle; extern /* Subroutine */ int xerbla_(char *, integer *), dggsvp_( char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *); static doublereal ulp; #define ALPHA(I) alpha[(I)-1] #define BETA(I) beta[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define U(I,J) u[(I)-1 + ((J)-1)* ( *ldu)] #define V(I,J) v[(I)-1 + ((J)-1)* ( *ldv)] #define Q(I,J) q[(I)-1 + ((J)-1)* ( *ldq)] wantu = lsame_(jobu, "U"); wantv = lsame_(jobv, "V"); wantq = lsame_(jobq, "Q"); *info = 0; if (! (wantu || lsame_(jobu, "N"))) { *info = -1; } else if (! (wantv || lsame_(jobv, "N"))) { *info = -2; } else if (! (wantq || lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*p < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -10; } else if (*ldb < max(1,*p)) { *info = -12; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -16; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -18; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("DGGSVD", &i__1); return 0; } /* Compute the Frobenius norm of matrices A and B */ anorm = dlange_("1", m, n, &A(1,1), lda, &WORK(1)); bnorm = dlange_("1", p, n, &B(1,1), ldb, &WORK(1)); /* Get machine precision and set up threshold for determining the effective numerical rank of the matrices A and B. */ ulp = dlamch_("Precision"); unfl = dlamch_("Safe Minimum"); tola = max(*m,*n) * max(anorm,unfl) * ulp; tolb = max(*p,*n) * max(bnorm,unfl) * ulp; /* Preprocessing */ dggsvp_(jobu, jobv, jobq, m, p, n, &A(1,1), lda, &B(1,1), ldb, & tola, &tolb, k, l, &U(1,1), ldu, &V(1,1), ldv, &Q(1,1), ldq, &IWORK(1), &WORK(1), &WORK(*n + 1), info); /* Compute the GSVD of two upper "triangular" matrices */ dtgsja_(jobu, jobv, jobq, m, p, n, k, l, &A(1,1), lda, &B(1,1), ldb, &tola, &tolb, &ALPHA(1), &BETA(1), &U(1,1), ldu, &V(1,1), ldv, &Q(1,1), ldq, &WORK(1), &ncycle, info); return 0; /* End of DGGSVD */ } /* dggsvd_ */
/* Subroutine */ int zgegs_(char *jobvsl, char *jobvsr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublecomplex * work, integer *lwork, doublereal *rwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGEGS computes for a pair of N-by-N complex nonsymmetric matrices A, B: the generalized eigenvalues (alpha, beta), the complex Schur form (A, B), and optionally left and/or right Schur vectors (VSL and VSR). (If only the generalized eigenvalues are needed, use the driver ZGEGV instead.) A generalized eigenvalue for a pair of matrices (A,B) is, roughly speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0, and even for both being zero. A good beginning reference is the book, "VISMatrix Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) The (generalized) Schur form of a pair of matrices is the result of multiplying both matrices on the left by one unitary matrix and both on the right by another unitary matrix, these two unitary matrices being chosen so as to bring the pair of matrices into upper triangular form with the diagonal elements of B being non-negative real numbers (this is also called complex Schur form.) The left and right Schur vectors are the columns of VSL and VSR, respectively, where VSL and VSR are the unitary matrices which reduce A and B to Schur form: Schur form of (A,B) = ( (VSL)**H A (VSR), (VSL)**H B (VSR) ) Arguments ========= JOBVSL (input) CHARACTER*1 = 'N': do not compute the left Schur vectors; = 'V': compute the left Schur vectors. JOBVSR (input) CHARACTER*1 = 'N': do not compute the right Schur vectors; = 'V': compute the right Schur vectors. N (input) INTEGER The order of the matrices A, B, VSL, and VSR. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA, N) On entry, the first of the pair of matrices whose generalized eigenvalues and (optionally) Schur vectors are to be computed. On exit, the generalized Schur form of A. LDA (input) INTEGER The leading dimension of A. LDA >= max(1,N). B (input/output) COMPLEX*16 array, dimension (LDB, N) On entry, the second of the pair of matrices whose generalized eigenvalues and (optionally) Schur vectors are to be computed. On exit, the generalized Schur form of B. LDB (input) INTEGER The leading dimension of B. LDB >= max(1,N). ALPHA (output) COMPLEX*16 array, dimension (N) BETA (output) COMPLEX*16 array, dimension (N) On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), j=1,...,N are the diagonals of the complex Schur form (A,B) output by ZGEGS. The BETA(j) will be non-negative real. Note: the quotients ALPHA(j)/BETA(j) may easily over- or underflow, and BETA(j) may even be zero. Thus, the user should avoid naively computing the ratio alpha/beta. However, ALPHA will be always less than and usually comparable with norm(A) in magnitude, and BETA always less than and usually comparable with norm(B). VSL (output) COMPLEX*16 array, dimension (LDVSL,N) If JOBVSL = 'V', VSL will contain the left Schur vectors. (See "Purpose", above.) Not referenced if JOBVSL = 'N'. LDVSL (input) INTEGER The leading dimension of the matrix VSL. LDVSL >= 1, and if JOBVSL = 'V', LDVSL >= N. VSR (output) COMPLEX*16 array, dimension (LDVSR,N) If JOBVSR = 'V', VSR will contain the right Schur vectors. (See "Purpose", above.) Not referenced if JOBVSR = 'N'. LDVSR (input) INTEGER The leading dimension of the matrix VSR. LDVSR >= 1, and if JOBVSR = 'V', LDVSR >= N. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,2*N). For good performance, LWORK must generally be larger. To compute the optimal value of LWORK, call ILAENV to get blocksizes (for ZGEQRF, ZUNMQR, and CUNGQR.) Then compute: NB -- MAX of the blocksizes for ZGEQRF, ZUNMQR, and CUNGQR; the optimal LWORK is N*(NB+1). RWORK (workspace) DOUBLE PRECISION array, dimension (3*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. =1,...,N: The QZ iteration failed. (A,B) are not in Schur form, but ALPHA(j) and BETA(j) should be correct for j=INFO+1,...,N. > N: errors that usually indicate LAPACK problems: =N+1: error return from ZGGBAL =N+2: error return from ZGEQRF =N+3: error return from ZUNMQR =N+4: error return from ZUNGQR =N+5: error return from ZGGHRD =N+6: error return from ZHGEQZ (other than failed iteration) =N+7: error return from ZGGBAK (computing VSL) =N+8: error return from ZGGBAK (computing VSR) =N+9: error return from ZLASCL (various places) ===================================================================== Decode the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c_n1 = -1; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, vsr_dim1, vsr_offset, i__1, i__2, i__3; /* Local variables */ static doublereal anrm, bnrm; static integer itau; extern logical lsame_(char *, char *); static integer ileft, iinfo, icols; static logical ilvsl; static integer iwork; static logical ilvsr; static integer irows; extern doublereal dlamch_(char *); extern /* Subroutine */ int zggbak_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublecomplex *, integer *, integer *), zggbal_(char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer * , integer *, doublereal *, doublereal *, doublereal *, integer *); static logical ilascl, ilbscl; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, integer *, doublereal *); static doublereal bignum; static integer ijobvl, iright; extern /* Subroutine */ int zgghrd_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer * ), zlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublecomplex *, integer *, integer *); static integer ijobvr; extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer * ); static doublereal anrmto; static integer lwkmin; static doublereal bnrmto; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlaset_(char *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *), zhgeqz_( char *, char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, integer *); static doublereal smlnum; static integer irwork, lwkopt; extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); static integer ihi, ilo; static doublereal eps; #define ALPHA(I) alpha[(I)-1] #define BETA(I) beta[(I)-1] #define WORK(I) work[(I)-1] #define RWORK(I) rwork[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define VSL(I,J) vsl[(I)-1 + ((J)-1)* ( *ldvsl)] #define VSR(I,J) vsr[(I)-1 + ((J)-1)* ( *ldvsr)] if (lsame_(jobvsl, "N")) { ijobvl = 1; ilvsl = FALSE_; } else if (lsame_(jobvsl, "V")) { ijobvl = 2; ilvsl = TRUE_; } else { ijobvl = -1; ilvsl = FALSE_; } if (lsame_(jobvsr, "N")) { ijobvr = 1; ilvsr = FALSE_; } else if (lsame_(jobvsr, "V")) { ijobvr = 2; ilvsr = TRUE_; } else { ijobvr = -1; ilvsr = FALSE_; } /* Test the input arguments Computing MAX */ i__1 = *n << 1; lwkmin = max(i__1,1); lwkopt = lwkmin; *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { *info = -11; } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { *info = -13; } else if (*lwork < lwkmin) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("ZGEGS ", &i__1); return 0; } /* Quick return if possible */ WORK(1).r = (doublereal) lwkopt, WORK(1).i = 0.; if (*n == 0) { return 0; } /* Get machine constants */ eps = dlamch_("E") * dlamch_("B"); safmin = dlamch_("S"); smlnum = *n * safmin / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = zlange_("M", n, n, &A(1,1), lda, &RWORK(1)); ilascl = FALSE_; if (anrm > 0. && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { zlascl_("G", &c_n1, &c_n1, &anrm, &anrmto, n, n, &A(1,1), lda, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = zlange_("M", n, n, &B(1,1), ldb, &RWORK(1)); ilbscl = FALSE_; if (bnrm > 0. && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { zlascl_("G", &c_n1, &c_n1, &bnrm, &bnrmto, n, n, &B(1,1), ldb, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } /* Permute the matrix to make it more nearly triangular */ ileft = 1; iright = *n + 1; irwork = iright + *n; iwork = 1; zggbal_("P", n, &A(1,1), lda, &B(1,1), ldb, &ilo, &ihi, &RWORK( ileft), &RWORK(iright), &RWORK(irwork), &iinfo); if (iinfo != 0) { *info = *n + 1; goto L10; } /* Reduce B to triangular form, and initialize VSL and/or VSR */ irows = ihi + 1 - ilo; icols = *n + 1 - ilo; itau = iwork; iwork = itau + irows; i__1 = *lwork + 1 - iwork; zgeqrf_(&irows, &icols, &B(ilo,ilo), ldb, &WORK(itau), &WORK( iwork), &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__3 = iwork; i__1 = lwkopt, i__2 = (integer) WORK(iwork).r + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 2; goto L10; } i__1 = *lwork + 1 - iwork; zunmqr_("L", "C", &irows, &icols, &irows, &B(ilo,ilo), ldb, & WORK(itau), &A(ilo,ilo), lda, &WORK(iwork), &i__1, & iinfo); if (iinfo >= 0) { /* Computing MAX */ i__3 = iwork; i__1 = lwkopt, i__2 = (integer) WORK(iwork).r + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 3; goto L10; } if (ilvsl) { zlaset_("Full", n, n, &c_b1, &c_b2, &VSL(1,1), ldvsl); i__1 = irows - 1; i__2 = irows - 1; zlacpy_("L", &i__1, &i__2, &B(ilo+1,ilo), ldb, &VSL(ilo+1,ilo), ldvsl); i__1 = *lwork + 1 - iwork; zungqr_(&irows, &irows, &irows, &VSL(ilo,ilo), ldvsl, & WORK(itau), &WORK(iwork), &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__3 = iwork; i__1 = lwkopt, i__2 = (integer) WORK(iwork).r + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 4; goto L10; } } if (ilvsr) { zlaset_("Full", n, n, &c_b1, &c_b2, &VSR(1,1), ldvsr); } /* Reduce to generalized Hessenberg form */ zgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &A(1,1), lda, &B(1,1), ldb, &VSL(1,1), ldvsl, &VSR(1,1), ldvsr, &iinfo); if (iinfo != 0) { *info = *n + 5; goto L10; } /* Perform QZ algorithm, computing Schur vectors if desired */ iwork = itau; i__1 = *lwork + 1 - iwork; zhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &A(1,1), lda, &B(1,1), ldb, &ALPHA(1), &BETA(1), &VSL(1,1), ldvsl, & VSR(1,1), ldvsr, &WORK(iwork), &i__1, &RWORK(irwork), & iinfo); if (iinfo >= 0) { /* Computing MAX */ i__3 = iwork; i__1 = lwkopt, i__2 = (integer) WORK(iwork).r + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { if (iinfo > 0 && iinfo <= *n) { *info = iinfo; } else if (iinfo > *n && iinfo <= *n << 1) { *info = iinfo - *n; } else { *info = *n + 6; } goto L10; } /* Apply permutation to VSL and VSR */ if (ilvsl) { zggbak_("P", "L", n, &ilo, &ihi, &RWORK(ileft), &RWORK(iright), n, & VSL(1,1), ldvsl, &iinfo); if (iinfo != 0) { *info = *n + 7; goto L10; } } if (ilvsr) { zggbak_("P", "R", n, &ilo, &ihi, &RWORK(ileft), &RWORK(iright), n, & VSR(1,1), ldvsr, &iinfo); if (iinfo != 0) { *info = *n + 8; goto L10; } } /* Undo scaling */ if (ilascl) { zlascl_("U", &c_n1, &c_n1, &anrmto, &anrm, n, n, &A(1,1), lda, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } zlascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &ALPHA(1), n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } if (ilbscl) { zlascl_("U", &c_n1, &c_n1, &bnrmto, &bnrm, n, n, &B(1,1), ldb, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } zlascl_("G", &c_n1, &c_n1, &bnrmto, &bnrm, n, &c__1, &BETA(1), n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } L10: WORK(1).r = (doublereal) lwkopt, WORK(1).i = 0.; return 0; /* End of ZGEGS */ } /* zgegs_ */
/* Apply the L-BFGS Strang's two-loop recursion to compute a search direction. */ static opk_status_t apply(opk_vmlmn_t* opt, const opk_vector_t* g) { double sty, yty; opk_index_t j, k; if (opt->mp < 1) { /* Will use the steepest descent direction. */ return OPK_NOT_POSITIVE_DEFINITE; } COPY(opt->d, g); if (opt->method != OPK_VMLMN) { /* Apply the original L-BFGS Strang's two-loop recursion. */ for (j = 1; j <= opt->mp; ++j) { k = slot(opt, j); if (RHO(k) > 0) { BETA(k) = RHO(k)*DOT(opt->d, S(k)); UPDATE(opt->d, -BETA(k), Y(k)); } } if (opt->gamma != 1) { /* Apply initial inverse Hessian approximation. */ SCALE(opt->d, opt->gamma); } for (j = opt->mp; j >= 1; --j) { k = slot(opt, j); if (RHO(k) > 0) { UPDATE(opt->d, BETA(k) - RHO(k)*DOT(opt->d, Y(k)), S(k)); } } } else { /* Apply L-BFGS Strang's two-loop recursion restricted to the subspace of free variables. */ opt->gamma = 0; for (j = 1; j <= opt->mp; ++j) { k = slot(opt, j); sty = WDOT(Y(k), S(k)); if (sty <= 0) { RHO(k) = 0; continue; } RHO(k) = 1/sty; BETA(k) = RHO(k)*WDOT(opt->d, S(k)); UPDATE(opt->d, -BETA(k), Y(k)); if (opt->gamma == 0) { yty = WDOT(Y(k), Y(k)); if (yty > 0) { opt->gamma = sty/yty; } } } if (opt->gamma != 1) { if (opt->gamma <= 0) { /* Force using the steepest descent direction. */ return OPK_NOT_POSITIVE_DEFINITE; } SCALE(opt->d, opt->gamma); } for (j = opt->mp; j >= 1; --j) { k = slot(opt, j); if (RHO(k) > 0) { UPDATE(opt->d, BETA(k) - RHO(k)*WDOT(opt->d, Y(k)), S(k)); } } } if (opt->bounds != 0) { /* Enforce search direction to belong to the subset of the free variables. */ opk_vproduct(opt->d, opt->w, opt->d); } return OPK_SUCCESS; }