static void least_squares_solve(kf_t *kf, double *measurements, double *lsq_state) { double decor_measurements[kf->obs_dim]; /*VEC_PRINTF(measurements, kf->obs_dim);*/ decorrelate(kf, measurements, decor_measurements); s32 obs_dim = kf->obs_dim; s32 state_dim = kf->state_dim; s32 nrhs = 1; double decor_obs_mtx_transpose[kf->obs_dim * kf->state_dim]; for (u32 i=0; i<kf->obs_dim; i++) { for (u32 j=0; j<kf->state_dim; j++) { decor_obs_mtx_transpose[i + j*kf->obs_dim] = kf->decor_obs_mtx[i*kf->state_dim + j]; } } memcpy(lsq_state, decor_measurements, kf->obs_dim * sizeof(double)); integer ldb = (s32) MAX(kf->state_dim, kf->obs_dim); double s[MIN(kf->state_dim, kf->obs_dim)]; double rcond = 1e-12; integer rank; double w[1]; //try 25 + 10*num_sats integer lwork = -1; integer info; dgelss_((integer *) &obs_dim, (integer *) &state_dim, (integer *) &nrhs, //M, N, NRHS &decor_obs_mtx_transpose[0], (integer *) &obs_dim, //A, LDA &lsq_state[0], &ldb, //B, LDB &s[0], &rcond, // S, RCOND &rank, //RANK &w[0], &lwork, // WORK, LWORK &info); //INFO lwork = round(w[0]); double work[lwork]; dgelss_((integer *) &obs_dim, (integer *) &state_dim, (integer *) &nrhs, //M, N, NRHS &decor_obs_mtx_transpose[0], (integer *) &obs_dim, //A, LDA &lsq_state[0], &ldb, //B, LDB &s[0], &rcond, // S, RCOND &rank, //RANK &work[0], &lwork, // WORK, LWORK &info); //INFO memset(&lsq_state[3],0,3 * sizeof(double)); //should already be nearly zero, because this bit of state is independent of the obs }
vctDynamicMatrix<double> nmrLSMinNorm( vctDynamicMatrix<double>& vctA, vctDynamicMatrix<double>& vctb, nmrLSMinNorm::Data& data, CISSTNETLIB_DOUBLE rcond ){ // data pointers CISSTNETLIB_DOUBLE* A = vctA.Pointer(); CISSTNETLIB_DOUBLE* B = vctb.Pointer(); // check if we need to reallocate data if( data.M != int(vctA.rows()) || data.N != int(vctA.cols()) || data.NRHS != int(vctb.cols()) ){ if( data.S != NULL ) { delete[] data.S; } if( data.WORK != NULL ) { delete[] data.WORK; } data = nmrLSMinNorm::Data( vctA, vctb, rcond ); } // copy the data for underdetermined systems if( data.underdetermined ){ B = data.B.Pointer(); } // solve the LS with minimum norm dgelss_( &data.M, &data.N, &data.NRHS, &A[0], &data.LDA, &B[0], &data.LDB, &data.S[0], &data.RCOND, &data.RANK, &data.WORK[0], &data.LWORK, &data.INFO ); data.CheckInfo(); // Assign??? vctDynamicMatrix<double> vctx( data.N, data.NRHS, VCT_COL_MAJOR ); if( data.underdetermined ){ for( int r=0; r<data.N; r++ ){ for( int c=0; c<data.NRHS; c++ ){ vctx[r][c] = data.B[r][c]; } } } else{ for( int r=0; r<data.N; r++ ){ for( int c=0; c<data.NRHS; c++ ){ vctx[r][c] = vctb[r][c]; } } } return vctx; }
/*! calculate the least-squares-least-norm solution for overdetermined or underdetermined A*x=y using dgelss\n */ inline long dgematrix::dgelss(dgematrix& B, dcovector& S, long& RANK, const double RCOND =-1. ) { #ifdef CPPL_VERBOSE std::cerr << "# [MARK] dgematrix::dgelss(dgematrix&, dcovector&, long& const double)" << std::endl; #endif//CPPL_VERBOSE #ifdef CPPL_DEBUG if(M!=B.M){ std::cerr << "[ERROR] dgematrix::dgelss" << "(dgematrix&, dcovector&, long&, const double) " << std::endl << "These matrix and vector cannot be solved." << std::endl << "Your input was (" << M << "x" << N << ") and (" << B.M << "x" << B.N << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG if(M<N){ //underdetermined dgematrix tmp(N,B.N); for(long i=0; i<B.M; i++){ for(long j=0; j<B.N; j++){ tmp(i,j)=B(i,j); } } B.clear(); swap(B,tmp); } S.resize(min(M,N)); long NRHS(B.N), LDA(M), LDB(B.M), LWORK(3*min(M,N)+max(max(2*min(M,N),max(M,N)), NRHS)), INFO(1); double *WORK(new double[LWORK]); dgelss_(M, N, NRHS, Array, LDA, B.Array, LDB, S.Array, RCOND, RANK, WORK, LWORK, INFO); delete [] WORK; if(INFO!=0){ std::cerr << "[WARNING] dgematrix::dgelss" << "(dgematrix&, docvector&, long, const double) " << "Serious trouble happend. INFO = " << INFO << "." << std::endl; } return INFO; }
vctDynamicMatrix<double> nmrLSMinNorm( vctDynamicMatrix<double>& vctA, vctDynamicMatrix<double>& vctb, CISSTNETLIB_DOUBLE rcond ){ // data pointers CISSTNETLIB_DOUBLE* A = vctA.Pointer(); CISSTNETLIB_DOUBLE* B = vctb.Pointer(); // allocate data. Allocate a LDBxNRHS B matrix for underdetermined systems. nmrLSMinNorm::Data data( vctA, vctb, rcond ); // copy the data for underdetermined systems if( data.underdetermined ){ B = data.B.Pointer(); } // solve the LS with minimum norm dgelss_( &data.M, &data.N, &data.NRHS, &A[0], &data.LDA, &B[0], &data.LDB, &data.S[0], &data.RCOND, &data.RANK, &data.WORK[0], &data.LWORK, &data.INFO ); delete[] data.S; delete[] data.WORK; data.CheckInfo(); // Assign??? vctDynamicMatrix<double> vctx( data.N, data.NRHS, VCT_COL_MAJOR ); if( data.underdetermined ){ for( int r=0; r<data.N; r++ ){ for( int c=0; c<data.NRHS; c++ ){ vctx[r][c] = data.B[r][c]; } } } else{ for( int r=0; r<data.N; r++ ){ for( int c=0; c<data.NRHS; c++ ){ vctx[r][c] = vctb[r][c]; } } } return vctx; }
void dgelss_driver(double *A, double *b, double *x, int m, int n, int nrhs) { if (m < n) { printf("Error: driver now only works when m >= n\n"); return; } else { double *Atmp = malloc(sizeof(double) * m * n); double *btmp = malloc(sizeof(double) * m * nrhs); int lda = m; int ldb = m; double *s = malloc(sizeof(double) * n); /* Output array */ double rcond = -1.0; int rank; /* Output */ int lwork = 16 * (3 * MIN(m, n) + MAX(MAX(2 * MIN(m, n), MAX(m, n)), nrhs)); double *work = malloc(sizeof(double) * lwork); int info; int i, j; /* Go from row- to column-major */ for (i = 0; i < m; i++) for (j = 0; j < n; j++) Atmp[j * m + i] = A[i * n + j]; for (i = 0; i < m; i++) for (j = 0; j < nrhs; j++) btmp[j * m + i] = b[i * nrhs + j]; /* Make the FORTRAN call */ dgelss_(&m, &n, &nrhs, Atmp, &lda, btmp, &ldb, s, &rcond, &rank, work, &lwork, &info); /* Go from column- to row-major */ for (i = 0; i < n; i++) for (j = 0; j < nrhs; j++) x[i * nrhs + j] = btmp[j * m + i]; free(Atmp); free(btmp); free(s); free(work); } }
nmrLSMinNorm::Data::Data( const vctDynamicMatrix<double>& A, const vctDynamicMatrix<double>& b, double rcond ) : M( A.rows() ), N( A.cols() ), NRHS( b.cols() ), LDA( M ), LDB( (M<N) ? N : M ), S( new CISSTNETLIB_DOUBLE[ (M<N) ? M : N ] ), RCOND( rcond ), WORK( NULL ), LWORK( -1 ), // -1 to determined the optimal work space size INFO( 0 ), underdetermined( (M<N) ? true : false ){ CheckSystem( A, b ); // this call determines the optimal work space size CISSTNETLIB_DOUBLE work[1]; // size will be here dgelss_( &M, &N, &NRHS, NULL, &LDA, NULL, &LDB, &S[0], &RCOND, &RANK, &work[0], &LWORK, &INFO ); LWORK = work[0]; // copy the work space size WORK = new CISSTNETLIB_DOUBLE[LWORK]; // allocate the work space // if system is underdetermined (M<N) we need a larger b matrix if( underdetermined ){ B.SetSize( LDB, NRHS, VCT_COL_MAJOR ); for( int r=0; r<M; r++ ){ for( int c=0; c<NRHS; c++ ){ B[r][c] = b[r][c]; } } } }
GURLS_EXPORT int gelss( int *m, int *n, int* nrhs, double *a, int *lda, double* b, int *ldb, double *s, double *rcond, int *rank, double *work, int *lwork, int *info) { return dgelss_( m, n, nrhs, a, lda, b, ldb, s, rcond, rank, work, lwork, info); }
/* Subroutine */ int derrls_(char *path, integer *nunit) { /* Builtin functions */ integer s_wsle(cilist *), e_wsle(void); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); /* Local variables */ doublereal a[4] /* was [2][2] */, b[4] /* was [2][2] */, s[2], w[2]; char c2[2]; integer ip[2], info, irnk; extern /* Subroutine */ int dgels_(char *, integer *, integer *, integer * , doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal rcond; extern /* Subroutine */ int alaesm_(char *, logical *, integer *), dgelsd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *); extern logical lsamen_(integer *, char *, char *); extern /* Subroutine */ int dgelss_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), chkxer_(char *, integer *, integer *, logical *, logical *), dgelsx_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dgelsy_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* Fortran I/O blocks */ static cilist io___1 = { 0, 0, 0, 0, 0 }; /* -- LAPACK test routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DERRLS tests the error exits for the DOUBLE PRECISION least squares */ /* driver routines (DGELS, SGELSS, SGELSX, SGELSY, SGELSD). */ /* Arguments */ /* ========= */ /* PATH (input) CHARACTER*3 */ /* The LAPACK path name for the routines to be tested. */ /* NUNIT (input) INTEGER */ /* The unit number for output. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Scalars in Common .. */ /* .. */ /* .. Common blocks .. */ /* .. */ /* .. Executable Statements .. */ infoc_1.nout = *nunit; io___1.ciunit = infoc_1.nout; s_wsle(&io___1); e_wsle(); s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2); a[0] = 1.; a[2] = 2.; a[3] = 3.; a[1] = 4.; infoc_1.ok = TRUE_; if (lsamen_(&c__2, c2, "LS")) { /* Test error exits for the least squares driver routines. */ /* DGELS */ s_copy(srnamc_1.srnamt, "DGELS ", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgels_("/", &c__0, &c__0, &c__0, a, &c__1, b, &c__1, w, &c__1, &info); chkxer_("DGELS ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgels_("N", &c_n1, &c__0, &c__0, a, &c__1, b, &c__1, w, &c__1, &info); chkxer_("DGELS ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgels_("N", &c__0, &c_n1, &c__0, a, &c__1, b, &c__1, w, &c__1, &info); chkxer_("DGELS ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dgels_("N", &c__0, &c__0, &c_n1, a, &c__1, b, &c__1, w, &c__1, &info); chkxer_("DGELS ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 6; dgels_("N", &c__2, &c__0, &c__0, a, &c__1, b, &c__2, w, &c__2, &info); chkxer_("DGELS ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 8; dgels_("N", &c__2, &c__0, &c__0, a, &c__2, b, &c__1, w, &c__2, &info); chkxer_("DGELS ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 10; dgels_("N", &c__1, &c__1, &c__0, a, &c__1, b, &c__1, w, &c__1, &info); chkxer_("DGELS ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DGELSS */ s_copy(srnamc_1.srnamt, "DGELSS", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgelss_(&c_n1, &c__0, &c__0, a, &c__1, b, &c__1, s, &rcond, &irnk, w, &c__1, &info); chkxer_("DGELSS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelss_(&c__0, &c_n1, &c__0, a, &c__1, b, &c__1, s, &rcond, &irnk, w, &c__1, &info); chkxer_("DGELSS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgelss_(&c__0, &c__0, &c_n1, a, &c__1, b, &c__1, s, &rcond, &irnk, w, &c__1, &info); chkxer_("DGELSS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgelss_(&c__2, &c__0, &c__0, a, &c__1, b, &c__2, s, &rcond, &irnk, w, &c__2, &info); chkxer_("DGELSS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dgelss_(&c__2, &c__0, &c__0, a, &c__2, b, &c__1, s, &rcond, &irnk, w, &c__2, &info); chkxer_("DGELSS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DGELSX */ s_copy(srnamc_1.srnamt, "DGELSX", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgelsx_(&c_n1, &c__0, &c__0, a, &c__1, b, &c__1, ip, &rcond, &irnk, w, &info); chkxer_("DGELSX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelsx_(&c__0, &c_n1, &c__0, a, &c__1, b, &c__1, ip, &rcond, &irnk, w, &info); chkxer_("DGELSX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgelsx_(&c__0, &c__0, &c_n1, a, &c__1, b, &c__1, ip, &rcond, &irnk, w, &info); chkxer_("DGELSX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgelsx_(&c__2, &c__0, &c__0, a, &c__1, b, &c__2, ip, &rcond, &irnk, w, &info); chkxer_("DGELSX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dgelsx_(&c__2, &c__0, &c__0, a, &c__2, b, &c__1, ip, &rcond, &irnk, w, &info); chkxer_("DGELSX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DGELSY */ s_copy(srnamc_1.srnamt, "DGELSY", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgelsy_(&c_n1, &c__0, &c__0, a, &c__1, b, &c__1, ip, &rcond, &irnk, w, &c__10, &info); chkxer_("DGELSY", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelsy_(&c__0, &c_n1, &c__0, a, &c__1, b, &c__1, ip, &rcond, &irnk, w, &c__10, &info); chkxer_("DGELSY", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgelsy_(&c__0, &c__0, &c_n1, a, &c__1, b, &c__1, ip, &rcond, &irnk, w, &c__10, &info); chkxer_("DGELSY", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgelsy_(&c__2, &c__0, &c__0, a, &c__1, b, &c__2, ip, &rcond, &irnk, w, &c__10, &info); chkxer_("DGELSY", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dgelsy_(&c__2, &c__0, &c__0, a, &c__2, b, &c__1, ip, &rcond, &irnk, w, &c__10, &info); chkxer_("DGELSY", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 12; dgelsy_(&c__2, &c__2, &c__1, a, &c__2, b, &c__2, ip, &rcond, &irnk, w, &c__1, &info); chkxer_("DGELSY", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DGELSD */ s_copy(srnamc_1.srnamt, "DGELSD", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgelsd_(&c_n1, &c__0, &c__0, a, &c__1, b, &c__1, s, &rcond, &irnk, w, &c__10, ip, &info); chkxer_("DGELSD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelsd_(&c__0, &c_n1, &c__0, a, &c__1, b, &c__1, s, &rcond, &irnk, w, &c__10, ip, &info); chkxer_("DGELSD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgelsd_(&c__0, &c__0, &c_n1, a, &c__1, b, &c__1, s, &rcond, &irnk, w, &c__10, ip, &info); chkxer_("DGELSD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgelsd_(&c__2, &c__0, &c__0, a, &c__1, b, &c__2, s, &rcond, &irnk, w, &c__10, ip, &info); chkxer_("DGELSD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dgelsd_(&c__2, &c__0, &c__0, a, &c__2, b, &c__1, s, &rcond, &irnk, w, &c__10, ip, &info); chkxer_("DGELSD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 12; dgelsd_(&c__2, &c__2, &c__1, a, &c__2, b, &c__2, s, &rcond, &irnk, w, &c__1, ip, &info); chkxer_("DGELSD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); } /* Print a summary line. */ alaesm_(path, &infoc_1.ok, &infoc_1.nout); return 0; /* End of DERRLS */ } /* derrls_ */
/* Subroutine */ int ddrvls_(logical *dotype, integer *nm, integer *mval, integer *nn, integer *nval, integer *nns, integer *nsval, integer * nnb, integer *nbval, integer *nxval, doublereal *thresh, logical * tsterr, doublereal *a, doublereal *copya, doublereal *b, doublereal * copyb, doublereal *c__, doublereal *s, doublereal *copys, doublereal * work, integer *iwork, integer *nout) { /* Initialized data */ static integer iseedy[4] = { 1988,1989,1990,1991 }; /* Format strings */ static char fmt_9999[] = "(\002 TRANS='\002,a1,\002', M=\002,i5,\002, N" "=\002,i5,\002, NRHS=\002,i4,\002, NB=\002,i4,\002, type\002,i2" ",\002, test(\002,i2,\002)=\002,g12.5)"; static char fmt_9998[] = "(\002 M=\002,i5,\002, N=\002,i5,\002, NRHS=" "\002,i4,\002, NB=\002,i4,\002, type\002,i2,\002, test(\002,i2" ",\002)=\002,g12.5)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); double sqrt(doublereal), log(doublereal); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static integer info; static char path[3]; static integer rank, nrhs, nlvl, nrun, i__, j, k; extern /* Subroutine */ int alahd_(integer *, char *); static integer m, n; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static integer nfail, iseed[4]; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer crank; extern /* Subroutine */ int dgels_(char *, integer *, integer *, integer * , doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); static integer irank; static doublereal rcond; extern doublereal dasum_(integer *, doublereal *, integer *); static integer itran, mnmin, ncols; static doublereal norma, normb; extern doublereal dqrt12_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dqrt14_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dqrt17_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static char trans[1]; static integer nerrs, itype; extern /* Subroutine */ int dqrt13_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static integer lwork; extern /* Subroutine */ int dqrt15_(integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dqrt16_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); static integer nrows, lwlsy, nb, im, in; extern doublereal dlamch_(char *); extern /* Subroutine */ int alaerh_(char *, char *, integer *, integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *); static integer iscale; extern /* Subroutine */ int dgelsd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dgelss_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), alasvm_(char *, integer *, integer *, integer *, integer *), dgelsx_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dgelsy_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), dlarnv_(integer *, integer *, integer *, doublereal *), derrls_(char *, integer *), xlaenv_(integer *, integer *); static integer ldwork; static doublereal result[18]; static integer lda, ldb, inb; static doublereal eps; static integer ins; /* Fortran I/O blocks */ static cilist io___35 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___42 = { 0, 0, 0, fmt_9998, 0 }; /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University January 3, 2000 Purpose ======= DDRVLS tests the least squares driver routines DGELS, DGELSS, DGELSX, DGELSY and DGELSD. Arguments ========= DOTYPE (input) LOGICAL array, dimension (NTYPES) The matrix types to be used for testing. Matrices of type j (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. The matrix of type j is generated as follows: j=1: A = U*D*V where U and V are random orthogonal matrices and D has random entries (> 0.1) taken from a uniform distribution (0,1). A is full rank. j=2: The same of 1, but A is scaled up. j=3: The same of 1, but A is scaled down. j=4: A = U*D*V where U and V are random orthogonal matrices and D has 3*min(M,N)/4 random entries (> 0.1) taken from a uniform distribution (0,1) and the remaining entries set to 0. A is rank-deficient. j=5: The same of 4, but A is scaled up. j=6: The same of 5, but A is scaled down. NM (input) INTEGER The number of values of M contained in the vector MVAL. MVAL (input) INTEGER array, dimension (NM) The values of the matrix row dimension M. NN (input) INTEGER The number of values of N contained in the vector NVAL. NVAL (input) INTEGER array, dimension (NN) The values of the matrix column dimension N. NNS (input) INTEGER The number of values of NRHS contained in the vector NSVAL. NSVAL (input) INTEGER array, dimension (NNS) The values of the number of right hand sides NRHS. NNB (input) INTEGER The number of values of NB and NX contained in the vectors NBVAL and NXVAL. The blocking parameters are used in pairs (NB,NX). NBVAL (input) INTEGER array, dimension (NNB) The values of the blocksize NB. NXVAL (input) INTEGER array, dimension (NNB) The values of the crossover point NX. THRESH (input) DOUBLE PRECISION The threshold value for the test ratios. A result is included in the output file if RESULT >= THRESH. To have every test ratio printed, use THRESH = 0. TSTERR (input) LOGICAL Flag that indicates whether error exits are to be tested. A (workspace) DOUBLE PRECISION array, dimension (MMAX*NMAX) where MMAX is the maximum value of M in MVAL and NMAX is the maximum value of N in NVAL. COPYA (workspace) DOUBLE PRECISION array, dimension (MMAX*NMAX) B (workspace) DOUBLE PRECISION array, dimension (MMAX*NSMAX) where MMAX is the maximum value of M in MVAL and NSMAX is the maximum value of NRHS in NSVAL. COPYB (workspace) DOUBLE PRECISION array, dimension (MMAX*NSMAX) C (workspace) DOUBLE PRECISION array, dimension (MMAX*NSMAX) S (workspace) DOUBLE PRECISION array, dimension (min(MMAX,NMAX)) COPYS (workspace) DOUBLE PRECISION array, dimension (min(MMAX,NMAX)) WORK (workspace) DOUBLE PRECISION array, dimension (MMAX*NMAX + 4*NMAX + MMAX). IWORK (workspace) INTEGER array, dimension (15*NMAX) NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --iwork; --work; --copys; --s; --c__; --copyb; --b; --copya; --a; --nxval; --nbval; --nsval; --nval; --mval; --dotype; /* Function Body Initialize constants and the random number seed. */ s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16); s_copy(path + 1, "LS", (ftnlen)2, (ftnlen)2); nrun = 0; nfail = 0; nerrs = 0; for (i__ = 1; i__ <= 4; ++i__) { iseed[i__ - 1] = iseedy[i__ - 1]; /* L10: */ } eps = dlamch_("Epsilon"); /* Threshold for rank estimation */ rcond = sqrt(eps) - (sqrt(eps) - eps) / 2; /* Test the error exits */ if (*tsterr) { derrls_(path, nout); } /* Print the header if NM = 0 or NN = 0 and THRESH = 0. */ if ((*nm == 0 || *nn == 0) && *thresh == 0.) { alahd_(nout, path); } infoc_1.infot = 0; xlaenv_(&c__2, &c__2); xlaenv_(&c__9, &c__25); i__1 = *nm; for (im = 1; im <= i__1; ++im) { m = mval[im]; lda = max(1,m); i__2 = *nn; for (in = 1; in <= i__2; ++in) { n = nval[in]; mnmin = min(m,n); /* Computing MAX */ i__3 = max(1,m); ldb = max(i__3,n); i__3 = *nns; for (ins = 1; ins <= i__3; ++ins) { nrhs = nsval[ins]; /* Computing MAX Computing MAX */ d__1 = 1., d__2 = (doublereal) mnmin; i__4 = (integer) (log(max(d__1,d__2) / 26.) / log(2.)) + 1; nlvl = max(i__4,0); /* Computing MAX */ i__4 = 1, i__5 = (m + nrhs) * (n + 2), i__4 = max(i__4,i__5), i__5 = (n + nrhs) * (m + 2), i__4 = max(i__4,i__5), i__5 = m * n + (mnmin << 2) + max(m,n), i__4 = max( i__4,i__5), i__5 = mnmin * 12 + mnmin * 50 + (mnmin << 3) * nlvl + mnmin * nrhs + 676; lwork = max(i__4,i__5); for (irank = 1; irank <= 2; ++irank) { for (iscale = 1; iscale <= 3; ++iscale) { itype = (irank - 1) * 3 + iscale; if (! dotype[itype]) { goto L110; } if (irank == 1) { /* Test DGELS Generate a matrix of scaling type ISCALE */ dqrt13_(&iscale, &m, &n, ©a[1], &lda, &norma, iseed); i__4 = *nnb; for (inb = 1; inb <= i__4; ++inb) { nb = nbval[inb]; xlaenv_(&c__1, &nb); xlaenv_(&c__3, &nxval[inb]); for (itran = 1; itran <= 2; ++itran) { if (itran == 1) { *(unsigned char *)trans = 'N'; nrows = m; ncols = n; } else { *(unsigned char *)trans = 'T'; nrows = n; ncols = m; } ldwork = max(1,ncols); /* Set up a consistent rhs */ if (ncols > 0) { i__5 = ncols * nrhs; dlarnv_(&c__2, iseed, &i__5, &work[1]) ; i__5 = ncols * nrhs; d__1 = 1. / (doublereal) ncols; dscal_(&i__5, &d__1, &work[1], &c__1); } dgemm_(trans, "No transpose", &nrows, & nrhs, &ncols, &c_b24, ©a[1], & lda, &work[1], &ldwork, &c_b25, & b[1], &ldb) ; dlacpy_("Full", &nrows, &nrhs, &b[1], & ldb, ©b[1], &ldb); /* Solve LS or overdetermined system */ if (m > 0 && n > 0) { dlacpy_("Full", &m, &n, ©a[1], & lda, &a[1], &lda); dlacpy_("Full", &nrows, &nrhs, ©b[ 1], &ldb, &b[1], &ldb); } s_copy(srnamc_1.srnamt, "DGELS ", (ftnlen) 6, (ftnlen)6); dgels_(trans, &m, &n, &nrhs, &a[1], &lda, &b[1], &ldb, &work[1], &lwork, & info); if (info != 0) { alaerh_(path, "DGELS ", &info, &c__0, trans, &m, &n, &nrhs, &c_n1, & nb, &itype, &nfail, &nerrs, nout); } /* Check correctness of results */ ldwork = max(1,nrows); if (nrows > 0 && nrhs > 0) { dlacpy_("Full", &nrows, &nrhs, ©b[ 1], &ldb, &c__[1], &ldb); } dqrt16_(trans, &m, &n, &nrhs, ©a[1], & lda, &b[1], &ldb, &c__[1], &ldb, & work[1], result); if (itran == 1 && m >= n || itran == 2 && m < n) { /* Solving LS system */ result[1] = dqrt17_(trans, &c__1, &m, &n, &nrhs, ©a[1], &lda, & b[1], &ldb, ©b[1], &ldb, & c__[1], &work[1], &lwork); } else { /* Solving overdetermined system */ result[1] = dqrt14_(trans, &m, &n, & nrhs, ©a[1], &lda, &b[1], &ldb, &work[1], &lwork); } /* Print information about the tests that did not pass the threshold. */ for (k = 1; k <= 2; ++k) { if (result[k - 1] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___35.ciunit = *nout; s_wsfe(&io___35); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&nrhs, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&nb, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&itype, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&result[k - 1], (ftnlen)sizeof( doublereal)); e_wsfe(); ++nfail; } /* L20: */ } nrun += 2; /* L30: */ } /* L40: */ } } /* Generate a matrix of scaling type ISCALE and rank type IRANK. */ dqrt15_(&iscale, &irank, &m, &n, &nrhs, ©a[1], & lda, ©b[1], &ldb, ©s[1], &rank, & norma, &normb, iseed, &work[1], &lwork); /* workspace used: MAX(M+MIN(M,N),NRHS*MIN(M,N),2*N+M) Initialize vector IWORK. */ i__4 = n; for (j = 1; j <= i__4; ++j) { iwork[j] = 0; /* L50: */ } ldwork = max(1,m); /* Test DGELSX DGELSX: Compute the minimum-norm solution X to min( norm( A * X - B ) ) using a complete orthogonal factorization. */ dlacpy_("Full", &m, &n, ©a[1], &lda, &a[1], &lda); dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &b[1], & ldb); s_copy(srnamc_1.srnamt, "DGELSX", (ftnlen)6, (ftnlen) 6); dgelsx_(&m, &n, &nrhs, &a[1], &lda, &b[1], &ldb, & iwork[1], &rcond, &crank, &work[1], &info); if (info != 0) { alaerh_(path, "DGELSX", &info, &c__0, " ", &m, &n, &nrhs, &c_n1, &nb, &itype, &nfail, & nerrs, nout); } /* workspace used: MAX( MNMIN+3*N, 2*MNMIN+NRHS ) Test 3: Compute relative error in svd workspace: M*N + 4*MIN(M,N) + MAX(M,N) */ result[2] = dqrt12_(&crank, &crank, &a[1], &lda, & copys[1], &work[1], &lwork); /* Test 4: Compute error in solution workspace: M*NRHS + M */ dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &work[1], &ldwork); dqrt16_("No transpose", &m, &n, &nrhs, ©a[1], & lda, &b[1], &ldb, &work[1], &ldwork, &work[m * nrhs + 1], &result[3]); /* Test 5: Check norm of r'*A workspace: NRHS*(M+N) */ result[4] = 0.; if (m > crank) { result[4] = dqrt17_("No transpose", &c__1, &m, &n, &nrhs, ©a[1], &lda, &b[1], &ldb, & copyb[1], &ldb, &c__[1], &work[1], &lwork); } /* Test 6: Check if x is in the rowspace of A workspace: (M+NRHS)*(N+2) */ result[5] = 0.; if (n > crank) { result[5] = dqrt14_("No transpose", &m, &n, &nrhs, ©a[1], &lda, &b[1], &ldb, &work[1], & lwork); } /* Print information about the tests that did not pass the threshold. */ for (k = 3; k <= 6; ++k) { if (result[k - 1] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___40.ciunit = *nout; s_wsfe(&io___40); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&nrhs, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&nb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&itype, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&result[k - 1], (ftnlen) sizeof(doublereal)); e_wsfe(); ++nfail; } /* L60: */ } nrun += 4; /* Loop for testing different block sizes. */ i__4 = *nnb; for (inb = 1; inb <= i__4; ++inb) { nb = nbval[inb]; xlaenv_(&c__1, &nb); xlaenv_(&c__3, &nxval[inb]); /* Test DGELSY DGELSY: Compute the minimum-norm solution X to min( norm( A * X - B ) ) using the rank-revealing orthogonal factorization. Initialize vector IWORK. */ i__5 = n; for (j = 1; j <= i__5; ++j) { iwork[j] = 0; /* L70: */ } /* Set LWLSY to the adequate value. Computing MAX */ i__5 = 1, i__6 = mnmin + (n << 1) + nb * (n + 1), i__5 = max(i__5,i__6), i__6 = (mnmin << 1) + nb * nrhs; lwlsy = max(i__5,i__6); dlacpy_("Full", &m, &n, ©a[1], &lda, &a[1], & lda); dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &b[1], &ldb); s_copy(srnamc_1.srnamt, "DGELSY", (ftnlen)6, ( ftnlen)6); dgelsy_(&m, &n, &nrhs, &a[1], &lda, &b[1], &ldb, & iwork[1], &rcond, &crank, &work[1], & lwlsy, &info); if (info != 0) { alaerh_(path, "DGELSY", &info, &c__0, " ", &m, &n, &nrhs, &c_n1, &nb, &itype, & nfail, &nerrs, nout); } /* Test 7: Compute relative error in svd workspace: M*N + 4*MIN(M,N) + MAX(M,N) */ result[6] = dqrt12_(&crank, &crank, &a[1], &lda, & copys[1], &work[1], &lwork); /* Test 8: Compute error in solution workspace: M*NRHS + M */ dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &work[ 1], &ldwork); dqrt16_("No transpose", &m, &n, &nrhs, ©a[1], &lda, &b[1], &ldb, &work[1], &ldwork, & work[m * nrhs + 1], &result[7]); /* Test 9: Check norm of r'*A workspace: NRHS*(M+N) */ result[8] = 0.; if (m > crank) { result[8] = dqrt17_("No transpose", &c__1, &m, &n, &nrhs, ©a[1], &lda, &b[1], & ldb, ©b[1], &ldb, &c__[1], &work[ 1], &lwork); } /* Test 10: Check if x is in the rowspace of A workspace: (M+NRHS)*(N+2) */ result[9] = 0.; if (n > crank) { result[9] = dqrt14_("No transpose", &m, &n, & nrhs, ©a[1], &lda, &b[1], &ldb, & work[1], &lwork); } /* Test DGELSS DGELSS: Compute the minimum-norm solution X to min( norm( A * X - B ) ) using the SVD. */ dlacpy_("Full", &m, &n, ©a[1], &lda, &a[1], & lda); dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &b[1], &ldb); s_copy(srnamc_1.srnamt, "DGELSS", (ftnlen)6, ( ftnlen)6); dgelss_(&m, &n, &nrhs, &a[1], &lda, &b[1], &ldb, & s[1], &rcond, &crank, &work[1], &lwork, & info); if (info != 0) { alaerh_(path, "DGELSS", &info, &c__0, " ", &m, &n, &nrhs, &c_n1, &nb, &itype, & nfail, &nerrs, nout); } /* workspace used: 3*min(m,n) + max(2*min(m,n),nrhs,max(m,n)) Test 11: Compute relative error in svd */ if (rank > 0) { daxpy_(&mnmin, &c_b92, ©s[1], &c__1, &s[1] , &c__1); result[10] = dasum_(&mnmin, &s[1], &c__1) / dasum_(&mnmin, ©s[1], &c__1) / ( eps * (doublereal) mnmin); } else { result[10] = 0.; } /* Test 12: Compute error in solution */ dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &work[ 1], &ldwork); dqrt16_("No transpose", &m, &n, &nrhs, ©a[1], &lda, &b[1], &ldb, &work[1], &ldwork, & work[m * nrhs + 1], &result[11]); /* Test 13: Check norm of r'*A */ result[12] = 0.; if (m > crank) { result[12] = dqrt17_("No transpose", &c__1, & m, &n, &nrhs, ©a[1], &lda, &b[1], &ldb, ©b[1], &ldb, &c__[1], &work[ 1], &lwork); } /* Test 14: Check if x is in the rowspace of A */ result[13] = 0.; if (n > crank) { result[13] = dqrt14_("No transpose", &m, &n, & nrhs, ©a[1], &lda, &b[1], &ldb, & work[1], &lwork); } /* Test DGELSD DGELSD: Compute the minimum-norm solution X to min( norm( A * X - B ) ) using a divide and conquer SVD. Initialize vector IWORK. */ i__5 = n; for (j = 1; j <= i__5; ++j) { iwork[j] = 0; /* L80: */ } dlacpy_("Full", &m, &n, ©a[1], &lda, &a[1], & lda); dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &b[1], &ldb); s_copy(srnamc_1.srnamt, "DGELSD", (ftnlen)6, ( ftnlen)6); dgelsd_(&m, &n, &nrhs, &a[1], &lda, &b[1], &ldb, & s[1], &rcond, &crank, &work[1], &lwork, & iwork[1], &info); if (info != 0) { alaerh_(path, "DGELSD", &info, &c__0, " ", &m, &n, &nrhs, &c_n1, &nb, &itype, & nfail, &nerrs, nout); } /* Test 15: Compute relative error in svd */ if (rank > 0) { daxpy_(&mnmin, &c_b92, ©s[1], &c__1, &s[1] , &c__1); result[14] = dasum_(&mnmin, &s[1], &c__1) / dasum_(&mnmin, ©s[1], &c__1) / ( eps * (doublereal) mnmin); } else { result[14] = 0.; } /* Test 16: Compute error in solution */ dlacpy_("Full", &m, &nrhs, ©b[1], &ldb, &work[ 1], &ldwork); dqrt16_("No transpose", &m, &n, &nrhs, ©a[1], &lda, &b[1], &ldb, &work[1], &ldwork, & work[m * nrhs + 1], &result[15]); /* Test 17: Check norm of r'*A */ result[16] = 0.; if (m > crank) { result[16] = dqrt17_("No transpose", &c__1, & m, &n, &nrhs, ©a[1], &lda, &b[1], &ldb, ©b[1], &ldb, &c__[1], &work[ 1], &lwork); } /* Test 18: Check if x is in the rowspace of A */ result[17] = 0.; if (n > crank) { result[17] = dqrt14_("No transpose", &m, &n, & nrhs, ©a[1], &lda, &b[1], &ldb, & work[1], &lwork); } /* Print information about the tests that did not pass the threshold. */ for (k = 7; k <= 18; ++k) { if (result[k - 1] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___42.ciunit = *nout; s_wsfe(&io___42); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&nrhs, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&nb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&itype, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&result[k - 1], ( ftnlen)sizeof(doublereal)); e_wsfe(); ++nfail; } /* L90: */ } nrun += 12; /* L100: */ } L110: ; } /* L120: */ } /* L130: */ } /* L140: */ } /* L150: */ } /* Print a summary of the results. */ alasvm_(path, nout, &nfail, &nrun, &nerrs); return 0; /* End of DDRVLS */ } /* ddrvls_ */