static void update(struct bfgs *opt, double f, const double *grad) { size_t i, n = opt->dim; double *H = opt->inv_hess; double *s = opt->step; double *y = opt->dg; /* update step */ memcpy(s, opt->search, n * sizeof(s[0])); blas_dscal(n, linesearch_step(&opt->ls), s, 1); /* update df */ opt->df = f - opt->f0; /* update dg */ memcpy(y, grad, n * sizeof(y[0])); blas_daxpy(n, -1.0, opt->grad0, 1, y, 1); double s_y = blas_ddot(n, s, 1, y, 1); /* NOTE: could use damped update instead (Nocedal and Wright, p. 537) */ assert(s_y > 0); /* initialize inv hessian on first step (Nocedal and Wright, p. 143) */ if (opt->first_step) { /* */ double y_y = blas_ddot(n, y, 1, y, 1); assert(y_y > 0); double scale = s_y / y_y; memset(H, 0, n * n * sizeof(H[0])); for (i = 0; i < n; i++) { H[i * n + i] = scale; } opt->first_step = 0; } /* compute H_y */ double *H_y = opt->H_dg; blas_gemv(n, n, BLAS_NOTRANS, 1.0, H, n, y, 1, 0.0, H_y, 1); double y_H_y = blas_ddot(n, H_y, 1, y, 1); double scale1 = (1.0 + (y_H_y / s_y)) / s_y; double rho = 1.0 / s_y; /* update inverse hessian */ blas_dger(n, n, scale1, s, 1, s, 1, H, n); blas_dger(n, n, -rho, H_y, 1, s, 1, H, n); blas_dger(n, n, -rho, s, 1, H_y, 1, H, n); /* update search direction */ blas_dgemv(BLAS_NOTRANS, n, n, -1.0, opt->inv_hess, n, grad, 1, 0.0, opt->search, 1); assert(isfinite(blas_dnrm2(n, opt->search, 1))); /* update initial position, value, and grad */ memcpy(opt->x0, opt->x, n * sizeof(opt->x0[0])); opt->f0 = f; memcpy(opt->grad0, grad, n * sizeof(opt->grad0[0])); }
enum bfgs_task bfgs_advance(struct bfgs *opt, double f, const double *grad) { size_t n = opt->dim; assert(isfinite(f)); assert(isfinite(blas_dnrm2(n, grad, 1))); assert(opt->task == BFGS_STEP); double g = blas_ddot(n, grad, 1, opt->search, 1); enum linesearch_task lstask = linesearch_advance(&opt->ls, f, g); int ok = linesearch_sdec(&opt->ls) && linesearch_curv(&opt->ls); switch (lstask) { case LINESEARCH_CONV: break; case LINESEARCH_STEP: opt->ls_it++; if (opt->ls_it < opt->ctrl.ls_maxit) { memcpy(opt->x, opt->x0, n * sizeof(opt->x[0])); blas_daxpy(n, linesearch_step(&opt->ls), opt->search, 1, opt->x, 1); assert(opt->task == BFGS_STEP); goto out; } else if (ok) { break; } else { opt->task = BFGS_ERR_LNSRCH; /* maximum number of iterations */ } default: if (ok) { break; } else { opt->task = BFGS_ERR_LNSRCH; goto out; } } update(opt, f, grad); /* test for convergence */ if (converged(opt)) { opt->task = BFGS_CONV; } else { assert(opt->task == BFGS_STEP); double step0 = 1.0; double f0 = f; double g0 = blas_ddot(n, grad, 1, opt->search, 1); assert(g0 < 0); linesearch(opt, step0, f0, g0); } out: return opt->task; }
void update_dist(struct mlogit *m) { if (!mlogit_dim(m)) return; /* if (uintset_count(&m->ind) >= mlogit_ncat(m) / 2) { recompute_dist(m); return; } */ const size_t dim = mlogit_dim(m); const double psi = catdist_psi(&m->dist_); const double *x = m->x; double *eta = m->work->w0; double *eta1 = m->work->dw; // copy old values of eta and compute new eta values const size_t *ind; size_t iz, nz; uintset_get_vals(&m->ind, &ind, &nz); for (iz = 0; iz < nz; iz++) { size_t i = ind[iz]; eta[iz] = catdist_eta(&m->dist_, i); eta1[iz] = blas_ddot(dim, m->beta, 1, x + i * dim, 1); } // update dist for (iz = 0; iz < nz; iz++) { size_t i = ind[iz]; catdist_set_eta(&m->dist_, i, eta1[iz]); } // compute new value of psi const double psi1 = catdist_psi(&m->dist_); m->work->dpsi = psi1 - psi; // eta (w0) := exp(eta - psi1); for (iz = 0; iz < nz; iz++) { eta[iz] = exp(eta[iz] - psi1); } // eta1 := exp(eta1 - psi1) for (iz = 0; iz < nz; iz++) { eta1[iz] = exp(eta1[iz] - psi1); } // eta1 (dw) := eta1 - eta (w0) blas_daxpy(nz, -1.0, eta, 1, eta1, 1); }
void spblas_dcscar(char trans, int n, int m, const double *A, const int *colptr, const int *rowind, int nnz, const double *x, int incx, double *h, int ldh, double *v, int ldv, int ite, double *rnorm, double tol, double *work, int *info) { int i, j, l; double beta, r; *rnorm = blas_dnrm2(nnz, A, 1); beta = blas_dnrm2(n, x, incx); for (i=0; i<m; i++) { blas_dfill(m, 0.0, &h[i*ldh], 1); blas_dfill(n, 0.0, &v[i*ldv], 1); } blas_daxpy(n, 1.0/beta, x, incx, &v[0], 1); for (j=0; j<m; j++) { spblas_dcscmv(trans, n, n, 1.0, A, colptr, rowind, nnz, &v[j*ldv], 1, 0.0, work, 1); for (i=0; i<=j; i++) { for (l=0; l<ite; l++) { r = blas_ddot(n, work, 1, &v[i*ldv], 1); h[i+j*ldh] += r; blas_daxpy(n, -r, &v[i*ldv], 1, work, 1); } } if (j != m-1) { h[j+1+j*ldh] = blas_dnrm2(n, work, 1); if (h[j+1+j*ldh] < *rnorm * tol) { *rnorm = beta; *info = j+1; return; } blas_daxpy(n, 1.0/h[j+1+j*ldh], work, 1, &v[(j+1)*ldv], 1); } } *rnorm = beta; *info = j; }
int mlogit_check(const struct mlogit *m) { int fail = 0; size_t n = mlogit_ncat(m); size_t p = mlogit_dim(m); const double *x = mlogit_x(m); const double *beta = mlogit_coefs(m); const double *mean = mlogit_mean(m); double cov_scale; const double *cov = mlogit_cov(m, &cov_scale); size_t i, j; double *eta0 = xcalloc(n, sizeof(*eta0)); double *prob0 = xcalloc(n, sizeof(*prob0)); double *mean0 = xcalloc(p, sizeof(*mean0)); double *cov0 = xcalloc(p * (p + 1) / 2, sizeof(*cov0)); double *cov0_copy = xcalloc(p * (p + 1) / 2, sizeof(*cov0_copy)); double *cov_err = xcalloc(p * (p + 1) / 2, sizeof(*cov_err)); double *z = xmalloc(p * p * sizeof(*z)); double *w = xmalloc(p * sizeof(*w)); double *diff = xcalloc(p, sizeof(*diff)); double *err_z = xmalloc(p * sizeof(*err_z)); const enum blas_uplo uplo = F77_COV_UPLO; double probtot0 = 0; size_t lwork, liwork; lwork = lapack_dspevd_lwork(LA_EIG_VEC, p, &liwork); double *work = xmalloc(lwork * sizeof(*work)); ptrdiff_t *iwork = xmalloc(liwork * sizeof(*iwork)); CHECK(!catdist_check(&m->dist_)); blas_dcopy(n, m->offset, 1, eta0, 1); if (p > 0) blas_dgemv(BLAS_TRANS, p, n, 1.0, x, p, beta, 1, 1.0, eta0, 1); for (i = 0; i < n; i++) { CHECK_APPROX(eta0[i], catdist_eta(&m->dist_, i)); } for (i = 0; i < n; i++) { prob0[i] = catdist_prob(&m->dist_, i); probtot0 += prob0[i]; } if (m->moments < 1) goto out; if (p > 0) blas_dgemv(BLAS_NOTRANS, p, n, 1.0, x, p, prob0, 1, 0.0, mean0, 1); for (j = 0; j < p; j++) { CHECK_APPROX(mean0[j], mean[j]); } if (m->moments < 2) goto out; for (i = 0; i < n; i++) { blas_dcopy(p, x + i * p, 1, diff, 1); blas_daxpy(p, -1.0, mean0, 1, diff, 1); blas_dspr(uplo, p, prob0[i], diff, 1, cov0); } blas_dscal(p * (p + 1) / 2, 1.0 / probtot0, cov0, 1); // cov_err := cov0 - (1/cov_scale) * cov blas_dcopy(p * (p + 1) / 2, cov0, 1, cov_err, 1); blas_daxpy(p * (p + 1) / 2, -1.0 / cov_scale, cov, 1, cov_err, 1); // compute cov0 eigendecomp blas_dcopy(p * (p + 1) / 2, cov0, 1, cov0_copy, 1); ptrdiff_t info = lapack_dspevd(LA_EIG_VEC, uplo, p, cov0_copy, w, z, MAX(1, p), work, lwork, iwork, liwork); assert(info == 0); (void)info; // check for relative equality of cov0 and (1/cov_scale) * cov // on the eigenspaces of cov0 for (i = p; i > 0; i--) { const double *zi = z + (i - 1) * p; blas_dspmv(uplo, p, 1.0, cov_err, zi, 1, 0.0, err_z, 1); double z_err_z = blas_ddot(p, zi, 1, err_z, 1); CHECK(fabs(z_err_z) <= 2 * p * SQRT_DBL_EPSILON * (1 + fabs(w[i - 1]))); } out: free(iwork); free(work); free(err_z); free(diff); free(w); free(z); free(cov_err); free(cov0_copy); free(cov0); free(mean0); free(prob0); free(eta0); return fail; }
LVAL xslpdqrdc(V) { LVAL x, a, j, w, r, q; double *dx, *da, *dw, *dr, *dq; int offx, ldx, n, p, *dj, job; x = xlgetarg(); offx = getfixnum(xlgafixnum()); ldx = getfixnum(xlgafixnum()); n = getfixnum(xlgafixnum()); p = getfixnum(xlgafixnum()); a = xlgetarg(); j = xlgetarg(); w = xlgetarg(); job = getfixnum(xlgafixnum()); r = moreargs() ? xlgetarg() : NIL; q = moreargs() ? xlgetarg() : NIL; xllastarg(); if (p > n && (! null(r) || ! null(q))) xlfail("more columns than rows"); checkldim(ldx, n); dx = getlinalgdvec(offx, ldx * p, x); da = getlinalgdvec(0, p, a); dj = job != 0 ? getlinalgivec(0, p, j) : NULL; dw = job != 0 ? getlinalgdvec(0, p, w) : NULL; dr = null(r) ? NULL : getlinalgdvec(0, p * p, r); dq = null(q) ? NULL : getlinalgdvec(0, n * p, q); linpack_dqrdc(dx, ldx, n, p, da, dj, dw, job); if (! null(r)) { int i, j, ip, jn; /* copy the upper triangle of X to R in row major order */ for (i = 0, ip = 0; i < p; i++, ip += p) { for (j = 0; j < i; j++) dr[ip + j] = 0.0; for (j = i, jn = j * n; j < p; j++, jn += n) dr[ip + j] = dx[jn + i]; } } if (! null(q)) { int i, j, ip, jn, jp; double t; /* copy X into Q in row major order */ for (i = 0, ip = 0; i < n; i++, ip += p) for (j = 0, jn = 0; j < p; j++, jn += n) dq[ip + j] = dx[jn + i]; /* accumulate the Q transformation */ for (i = 0, ip = 0; i < p; i++, ip += p) { dq[ip + i] = da[i]; for (j = i + 1; j < p; j++) dq[ip + j] = 0.0; } for (i = p - 1, ip = i * p; i >= 0; i--, ip -= p) { if (i == n - 1) dq[ip + i] = 1.0; else { for (j = i, jp = ip; j < n; j++, jp += p) dq[jp + i] = -dq[jp + i]; dq[ip + i] += 1.0; } for (j = i - 1, jp = ip - p; j >= 0; j--, jp -= p) { if (dq[jp + j] != 0.0) { t = -blas_ddot(n - j, dq + jp + j, p, dq + jp + i, p) / dq[jp + j]; blas_daxpy(n - j, t, dq + jp + j, p, dq + jp + i, p); } } } } return NIL; }