/** * @brief Creates the penalty matrix D tilde of order k. * Returns the matrix Dk premultipied by a diagonal * matrix of weights. * * @param n number of observations * @param k order of the trendfilter * @param x locations of the responses * @return pointer to a csparse matrix * @see tf_calc_dktil */ cs * tf_calc_dktil (int n, int k, const double * x) { cs * delta_k; cs * delta_k_cp; cs * Dk; cs * Dktil; int i; Dk = tf_calc_dk(n, k, x); /* Deal with k=0 separately */ if(k == 0) return Dk; /* Construct diagonal matrix of differences: */ delta_k = cs_spalloc(n-k, n-k, (n-k), 1, 1); for(i = 0; i < n - k; i++) { delta_k->p[i] = i; delta_k->i[i] = i; delta_k->x[i] = k / (x[k + i] - x[i]); } delta_k->nz = n-k; delta_k_cp = cs_compress(delta_k); Dktil = cs_multiply(delta_k_cp, Dk); cs_spfree(Dk); cs_spfree(delta_k); cs_spfree(delta_k_cp); return Dktil; }
cs *cs_rinvwishart(const cs *A, double nu, const css *As){ int m, i, j, cnt; cs *T, *IW, *C, *W, *tC; csn *U; m = A->n; T = cs_spalloc (m, m, m*(m+1)/2, 1, 0) ; if (!T ) return (cs_done (T, NULL, NULL, 0)); double df = nu; cnt = 0; for(i = 0; i<m; i++){ T->p[i] = cnt; T->i[cnt] = i; T->x[cnt] = sqrt(rchisq(df)); cnt++; for(j = i+1; j<m; j++){ T->i[cnt] = j; T->x[cnt] = rnorm(0.0,1.0); cnt++; } df--; } T->p[m] = m*(m+1)/2; U = cs_chol(A, As); if(U==NULL){ PutRNGstate(); error("ill-conditioned cross-product: can't form Cholesky factor\n"); } C = cs_multiply(U->L,T); // t(T)%*%chol(A) tC = cs_transpose(C, TRUE); // t(CI) W = cs_multiply(C,tC); IW = cs_inv(W); // crossprod(t(CI)) cs_spfree(T); cs_nfree(U); cs_spfree(C); cs_spfree(tC); cs_spfree(W); return (cs_done (IW, NULL, NULL, 1)) ; /* success; free workspace, return C */ }
cs* cs_sorted_multiply2(const cs* a, const cs* b) { cs* D = cs_multiply(a,b); cs* E = cs_transpose(D,1); cs_spfree(D); cs* C = cs_transpose(E,1); cs_spfree(E); return C; }
cs* cs_sorted_multiply(const cs* a, const cs* b) { cs* A = cs_transpose (a, 1) ; cs* B = cs_transpose (b, 1) ; cs* D = cs_multiply (B,A) ; /* D = B'*A' */ cs_spfree (A) ; cs_spfree (B) ; cs_dropzeros (D) ; /* drop zeros from D */ cs* C = cs_transpose (D, 1) ; /* C = D', so that C is sorted */ cs_spfree (D) ; return C; }
void NM_gemm(const double alpha, NumericsMatrix* A, NumericsMatrix* B, const double beta, NumericsMatrix* C) { switch(A->storageType) { case NM_DENSE: { cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, A->size0, B->size1, B->size1, alpha, A->matrix0, A->size0, B->matrix0, B->size0, beta, C->matrix0, A->size0); NM_clearSparseBlock(C); NM_clearSparseStorage(C); break; } case NM_SPARSE_BLOCK: { prodNumericsMatrixNumericsMatrix(alpha, A, B, beta, C); NM_clearDense(C); NM_clearSparseStorage(C); break; } case NM_SPARSE: { CSparseMatrix* result = cs_add(cs_multiply(NM_csc(A), NM_csc(B)), NM_csc(C), alpha, beta); NM_clearDense(C); NM_clearSparseBlock(C); NM_clearSparseStorage(C); NM_sparse(C)->csc = result; C->size0 = (int)C->matrix2->csc->m; C->size1 = (int)C->matrix2->csc->n; break; } } }
/* p = amd(A+A') if symmetric is true, or amd(A'A) otherwise */ CS_INT *cs_amd (CS_INT order, const cs *A) /* order 0:natural, 1:Chol, 2:LU, 3:QR */ { cs *C, *A2, *AT ; CS_INT *Cp, *Ci, *last, *W, *len, *nv, *next, *P, *head, *elen, *degree, *w, *hhead, *ATp, *ATi, d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1, k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi, ok, cnz, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, n, m, t ; unsigned CS_INT h ; /* --- Construct matrix C ----------------------------------------------- */ if (!CS_CSC (A) || order <= 0 || order > 3) return (NULL) ; /* check */ AT = cs_transpose (A, 0) ; /* compute A' */ if (!AT) return (NULL) ; m = A->m ; n = A->n ; dense = CS_MAX (16, 10 * sqrt ((double) n)) ; /* find dense threshold */ dense = CS_MIN (n-2, dense) ; if (order == 1 && n == m) { C = cs_add (A, AT, 0, 0) ; /* C = A+A' */ } else if (order == 2) { ATp = AT->p ; /* drop dense columns from AT */ ATi = AT->i ; for (p2 = 0, j = 0 ; j < m ; j++) { p = ATp [j] ; /* column j of AT starts here */ ATp [j] = p2 ; /* new column j starts here */ if (ATp [j+1] - p > dense) continue ; /* skip dense col j */ for ( ; p < ATp [j+1] ; p++) ATi [p2++] = ATi [p] ; } ATp [m] = p2 ; /* finalize AT */ A2 = cs_transpose (AT, 0) ; /* A2 = AT' */ C = A2 ? cs_multiply (AT, A2) : NULL ; /* C=A'*A with no dense rows */ cs_spfree (A2) ; } else { C = cs_multiply (AT, A) ; /* C=A'*A */ } cs_spfree (AT) ; if (!C) return (NULL) ; cs_fkeep (C, &cs_diag, NULL) ; /* drop diagonal entries */ Cp = C->p ; cnz = Cp [n] ; P = cs_malloc (n+1, sizeof (CS_INT)) ; /* allocate result */ W = cs_malloc (8*(n+1), sizeof (CS_INT)) ; /* get workspace */ t = cnz + cnz/5 + 2*n ; /* add elbow room to C */ if (!P || !W || !cs_sprealloc (C, t)) return (cs_idone (P, C, W, 0)) ; len = W ; nv = W + (n+1) ; next = W + 2*(n+1) ; head = W + 3*(n+1) ; elen = W + 4*(n+1) ; degree = W + 5*(n+1) ; w = W + 6*(n+1) ; hhead = W + 7*(n+1) ; last = P ; /* use P as workspace for last */ /* --- Initialize quotient graph ---------------------------------------- */ for (k = 0 ; k < n ; k++) len [k] = Cp [k+1] - Cp [k] ; len [n] = 0 ; nzmax = C->nzmax ; Ci = C->i ; for (i = 0 ; i <= n ; i++) { head [i] = -1 ; /* degree list i is empty */ last [i] = -1 ; next [i] = -1 ; hhead [i] = -1 ; /* hash list i is empty */ nv [i] = 1 ; /* node i is just one node */ w [i] = 1 ; /* node i is alive */ elen [i] = 0 ; /* Ek of node i is empty */ degree [i] = len [i] ; /* degree of node i */ } mark = cs_wclear (0, 0, w, n) ; /* clear w */ elen [n] = -2 ; /* n is a dead element */ Cp [n] = -1 ; /* n is a root of assembly tree */ w [n] = 0 ; /* n is a dead element */ /* --- Initialize degree lists ------------------------------------------ */ for (i = 0 ; i < n ; i++) { d = degree [i] ; if (d == 0) /* node i is empty */ { elen [i] = -2 ; /* element i is dead */ nel++ ; Cp [i] = -1 ; /* i is a root of assembly tree */ w [i] = 0 ; } else if (d > dense) /* node i is dense */ { nv [i] = 0 ; /* absorb i into element n */ elen [i] = -1 ; /* node i is dead */ nel++ ; Cp [i] = CS_FLIP (n) ; nv [n]++ ; } else { if (head [d] != -1) last [head [d]] = i ; next [i] = head [d] ; /* put node i in degree list d */ head [d] = i ; } } while (nel < n) /* while (selecting pivots) do */ { /* --- Select node of minimum approximate degree -------------------- */ for (k = -1 ; mindeg < n && (k = head [mindeg]) == -1 ; mindeg++) ; if (next [k] != -1) last [next [k]] = -1 ; head [mindeg] = next [k] ; /* remove k from degree list */ elenk = elen [k] ; /* elenk = |Ek| */ nvk = nv [k] ; /* # of nodes k represents */ nel += nvk ; /* nv[k] nodes of A eliminated */ /* --- Garbage collection ------------------------------------------- */ if (elenk > 0 && cnz + mindeg >= nzmax) { for (j = 0 ; j < n ; j++) { if ((p = Cp [j]) >= 0) /* j is a live node or element */ { Cp [j] = Ci [p] ; /* save first entry of object */ Ci [p] = CS_FLIP (j) ; /* first entry is now CS_FLIP(j) */ } } for (q = 0, p = 0 ; p < cnz ; ) /* scan all of memory */ { if ((j = CS_FLIP (Ci [p++])) >= 0) /* found object j */ { Ci [q] = Cp [j] ; /* restore first entry of object */ Cp [j] = q++ ; /* new pointer to object j */ for (k3 = 0 ; k3 < len [j]-1 ; k3++) Ci [q++] = Ci [p++] ; } } cnz = q ; /* Ci [cnz...nzmax-1] now free */ } /* --- Construct new element ---------------------------------------- */ dk = 0 ; nv [k] = -nvk ; /* flag k as in Lk */ p = Cp [k] ; pk1 = (elenk == 0) ? p : cnz ; /* do in place if elen[k] == 0 */ pk2 = pk1 ; for (k1 = 1 ; k1 <= elenk + 1 ; k1++) { if (k1 > elenk) { e = k ; /* search the nodes in k */ pj = p ; /* list of nodes starts at Ci[pj]*/ ln = len [k] - elenk ; /* length of list of nodes in k */ } else { e = Ci [p++] ; /* search the nodes in e */ pj = Cp [e] ; ln = len [e] ; /* length of list of nodes in e */ } for (k2 = 1 ; k2 <= ln ; k2++) { i = Ci [pj++] ; if ((nvi = nv [i]) <= 0) continue ; /* node i dead, or seen */ dk += nvi ; /* degree[Lk] += size of node i */ nv [i] = -nvi ; /* negate nv[i] to denote i in Lk*/ Ci [pk2++] = i ; /* place i in Lk */ if (next [i] != -1) last [next [i]] = last [i] ; if (last [i] != -1) /* remove i from degree list */ { next [last [i]] = next [i] ; } else { head [degree [i]] = next [i] ; } } if (e != k) { Cp [e] = CS_FLIP (k) ; /* absorb e into k */ w [e] = 0 ; /* e is now a dead element */ } } if (elenk != 0) cnz = pk2 ; /* Ci [cnz...nzmax] is free */ degree [k] = dk ; /* external degree of k - |Lk\i| */ Cp [k] = pk1 ; /* element k is in Ci[pk1..pk2-1] */ len [k] = pk2 - pk1 ; elen [k] = -2 ; /* k is now an element */ /* --- Find set differences ----------------------------------------- */ mark = cs_wclear (mark, lemax, w, n) ; /* clear w if necessary */ for (pk = pk1 ; pk < pk2 ; pk++) /* scan 1: find |Le\Lk| */ { i = Ci [pk] ; if ((eln = elen [i]) <= 0) continue ;/* skip if elen[i] empty */ nvi = -nv [i] ; /* nv [i] was negated */ wnvi = mark - nvi ; for (p = Cp [i] ; p <= Cp [i] + eln - 1 ; p++) /* scan Ei */ { e = Ci [p] ; if (w [e] >= mark) { w [e] -= nvi ; /* decrement |Le\Lk| */ } else if (w [e] != 0) /* ensure e is a live element */ { w [e] = degree [e] + wnvi ; /* 1st time e seen in scan 1 */ } } } /* --- Degree update ------------------------------------------------ */ for (pk = pk1 ; pk < pk2 ; pk++) /* scan2: degree update */ { i = Ci [pk] ; /* consider node i in Lk */ p1 = Cp [i] ; p2 = p1 + elen [i] - 1 ; pn = p1 ; for (h = 0, d = 0, p = p1 ; p <= p2 ; p++) /* scan Ei */ { e = Ci [p] ; if (w [e] != 0) /* e is an unabsorbed element */ { dext = w [e] - mark ; /* dext = |Le\Lk| */ if (dext > 0) { d += dext ; /* sum up the set differences */ Ci [pn++] = e ; /* keep e in Ei */ h += e ; /* compute the hash of node i */ } else { Cp [e] = CS_FLIP (k) ; /* aggressive absorb. e->k */ w [e] = 0 ; /* e is a dead element */ } } } elen [i] = pn - p1 + 1 ; /* elen[i] = |Ei| */ p3 = pn ; p4 = p1 + len [i] ; for (p = p2 + 1 ; p < p4 ; p++) /* prune edges in Ai */ { j = Ci [p] ; if ((nvj = nv [j]) <= 0) continue ; /* node j dead or in Lk */ d += nvj ; /* degree(i) += |j| */ Ci [pn++] = j ; /* place j in node list of i */ h += j ; /* compute hash for node i */ } if (d == 0) /* check for mass elimination */ { Cp [i] = CS_FLIP (k) ; /* absorb i into k */ nvi = -nv [i] ; dk -= nvi ; /* |Lk| -= |i| */ nvk += nvi ; /* |k| += nv[i] */ nel += nvi ; nv [i] = 0 ; elen [i] = -1 ; /* node i is dead */ } else { degree [i] = CS_MIN (degree [i], d) ; /* update degree(i) */ Ci [pn] = Ci [p3] ; /* move first node to end */ Ci [p3] = Ci [p1] ; /* move 1st el. to end of Ei */ Ci [p1] = k ; /* add k as 1st element in of Ei */ len [i] = pn - p1 + 1 ; /* new len of adj. list of node i */ h %= n ; /* finalize hash of i */ next [i] = hhead [h] ; /* place i in hash bucket */ hhead [h] = i ; last [i] = h ; /* save hash of i in last[i] */ } } /* scan2 is done */ degree [k] = dk ; /* finalize |Lk| */ lemax = CS_MAX (lemax, dk) ; mark = cs_wclear (mark+lemax, lemax, w, n) ; /* clear w */ /* --- Supernode detection ------------------------------------------ */ for (pk = pk1 ; pk < pk2 ; pk++) { i = Ci [pk] ; if (nv [i] >= 0) continue ; /* skip if i is dead */ h = last [i] ; /* scan hash bucket of node i */ i = hhead [h] ; hhead [h] = -1 ; /* hash bucket will be empty */ for ( ; i != -1 && next [i] != -1 ; i = next [i], mark++) { ln = len [i] ; eln = elen [i] ; for (p = Cp [i]+1 ; p <= Cp [i] + ln-1 ; p++) w [Ci [p]] = mark; jlast = i ; for (j = next [i] ; j != -1 ; ) /* compare i with all j */ { ok = (len [j] == ln) && (elen [j] == eln) ; for (p = Cp [j] + 1 ; ok && p <= Cp [j] + ln - 1 ; p++) { if (w [Ci [p]] != mark) ok = 0 ; /* compare i and j*/ } if (ok) /* i and j are identical */ { Cp [j] = CS_FLIP (i) ; /* absorb j into i */ nv [i] += nv [j] ; nv [j] = 0 ; elen [j] = -1 ; /* node j is dead */ j = next [j] ; /* delete j from hash bucket */ next [jlast] = j ; } else { jlast = j ; /* j and i are different */ j = next [j] ; } } } } /* --- Finalize new element------------------------------------------ */ for (p = pk1, pk = pk1 ; pk < pk2 ; pk++) /* finalize Lk */ { i = Ci [pk] ; if ((nvi = -nv [i]) <= 0) continue ;/* skip if i is dead */ nv [i] = nvi ; /* restore nv[i] */ d = degree [i] + dk - nvi ; /* compute external degree(i) */ d = CS_MIN (d, n - nel - nvi) ; if (head [d] != -1) last [head [d]] = i ; next [i] = head [d] ; /* put i back in degree list */ last [i] = -1 ; head [d] = i ; mindeg = CS_MIN (mindeg, d) ; /* find new minimum degree */ degree [i] = d ; Ci [p++] = i ; /* place i in Lk */ } nv [k] = nvk ; /* # nodes absorbed into k */ if ((len [k] = p-pk1) == 0) /* length of adj list of element k*/ { Cp [k] = -1 ; /* k is a root of the tree */ w [k] = 0 ; /* k is now a dead element */ } if (elenk != 0) cnz = p ; /* free unused space in Lk */ } /* --- Postordering ----------------------------------------------------- */ for (i = 0 ; i < n ; i++) Cp [i] = CS_FLIP (Cp [i]) ;/* fix assembly tree */ for (j = 0 ; j <= n ; j++) head [j] = -1 ; for (j = n ; j >= 0 ; j--) /* place unordered nodes in lists */ { if (nv [j] > 0) continue ; /* skip if j is an element */ next [j] = head [Cp [j]] ; /* place j in list of its parent */ head [Cp [j]] = j ; } for (e = n ; e >= 0 ; e--) /* place elements in lists */ { if (nv [e] <= 0) continue ; /* skip unless e is an element */ if (Cp [e] != -1) { next [e] = head [Cp [e]] ; /* place e in list of its parent */ head [Cp [e]] = e ; } } for (k = 0, i = 0 ; i <= n ; i++) /* postorder the assembly tree */ { if (Cp [i] == -1) k = cs_tdfs (i, k, head, next, P, w) ; } return (cs_idone (P, C, W, 1)) ; }
/* Cholesky update/downdate */ int demo3 (problem *Prob) { cs *A, *C, *W = NULL, *WW, *WT, *E = NULL, *W2 ; int n, k, *Li, *Lp, *Wi, *Wp, p1, p2, *p = NULL, ok ; double *b, *x, *resid, *y = NULL, *Lx, *Wx, s, t, t1 ; css *S = NULL ; csn *N = NULL ; if (!Prob || !Prob->sym || Prob->A->n == 0) return (0) ; A = Prob->A ; C = Prob->C ; b = Prob->b ; x = Prob->x ; resid = Prob->resid; n = A->n ; if (!Prob->sym || n == 0) return (1) ; rhs (x, b, n) ; /* compute right-hand side */ printf ("\nchol then update/downdate ") ; print_order (1) ; y = cs_malloc (n, sizeof (double)) ; t = tic () ; S = cs_schol (1, C) ; /* symbolic Chol, amd(A+A') */ printf ("\nsymbolic chol time %8.2f\n", toc (t)) ; t = tic () ; N = cs_chol (C, S) ; /* numeric Cholesky */ printf ("numeric chol time %8.2f\n", toc (t)) ; if (!S || !N || !y) return (done3 (0, S, N, y, W, E, p)) ; t = tic () ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ printf ("solve chol time %8.2f\n", toc (t)) ; printf ("original: ") ; print_resid (1, C, x, b, resid) ; /* print residual */ k = n/2 ; /* construct W */ W = cs_spalloc (n, 1, n, 1, 0) ; if (!W) return (done3 (0, S, N, y, W, E, p)) ; Lp = N->L->p ; Li = N->L->i ; Lx = N->L->x ; Wp = W->p ; Wi = W->i ; Wx = W->x ; Wp [0] = 0 ; p1 = Lp [k] ; Wp [1] = Lp [k+1] - p1 ; s = Lx [p1] ; srand (1) ; for ( ; p1 < Lp [k+1] ; p1++) { p2 = p1 - Lp [k] ; Wi [p2] = Li [p1] ; Wx [p2] = s * rand () / ((double) RAND_MAX) ; } t = tic () ; ok = cs_updown (N->L, +1, W, S->parent) ; /* update: L*L'+W*W' */ t1 = toc (t) ; printf ("update: time: %8.2f\n", t1) ; if (!ok) return (done3 (0, S, N, y, W, E, p)) ; t = tic () ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; p = cs_pinv (S->pinv, n) ; W2 = cs_permute (W, p, NULL, 1) ; /* E = C + (P'W)*(P'W)' */ WT = cs_transpose (W2,1) ; WW = cs_multiply (W2, WT) ; cs_spfree (WT) ; cs_spfree (W2) ; E = cs_add (C, WW, 1, 1) ; cs_spfree (WW) ; if (!E || !p) return (done3 (0, S, N, y, W, E, p)) ; printf ("update: time: %8.2f (incl solve) ", t1+t) ; print_resid (1, E, x, b, resid) ; /* print residual */ cs_nfree (N) ; /* clear N */ t = tic () ; N = cs_chol (E, S) ; /* numeric Cholesky */ if (!N) return (done3 (0, S, N, y, W, E, p)) ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; printf ("rechol: time: %8.2f (incl solve) ", t) ; print_resid (1, E, x, b, resid) ; /* print residual */ t = tic () ; ok = cs_updown (N->L, -1, W, S->parent) ; /* downdate: L*L'-W*W' */ t1 = toc (t) ; if (!ok) return (done3 (0, S, N, y, W, E, p)) ; printf ("downdate: time: %8.2f\n", t1) ; t = tic () ; cs_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_lsolve (N->L, y) ; /* y = L\y */ cs_ltsolve (N->L, y) ; /* y = L'\y */ cs_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; printf ("downdate: time: %8.2f (incl solve) ", t1+t) ; print_resid (1, C, x, b, resid) ; /* print residual */ return (done3 (1, S, N, y, W, E, p)) ; }
/** * @brief Creates the penalty matrix of order k. * Returns the matrix Dk as a suite sparse style matrix. * * @param n number of observations * @param k order of the trendfilter * @param x locations of the responses * @return pointer to a csparse matrix * @see tf_calc_dktil */ cs * tf_calc_dk (int n, int k, const double * x) { long int i; int tk = 1; /* "this k" - will iterate until ts = k */ cs * D1; cs * D1_cp; cs * Dk; cs * Dk_cp; cs * delta_k; cs * delta_k_cp; cs * D1_x_delta; cs * Dk_next; cs * T; cs * eye; /* Deal with k=0 separately */ if(k == 0) { T = cs_spalloc (n, n, n, 1, 1) ; for (i = 0 ; i < n; i++) cs_entry (T, i, i, 1); eye = cs_compress (T); cs_spfree (T); return eye; } /* Contruct one 'full D1', which persists throughout and another copy as Dk */ D1 = cs_spalloc(n-tk, n, (n-tk)*2, 1, 1); Dk = cs_spalloc(n-tk, n, (n-tk)*2, 1, 1); D1->nz = (n-tk)*2; Dk->nz = (n-tk)*2; for (i = 0; i < (n-tk)*2; i++) { D1->p[i] = (i+1) / 2; Dk->p[i] = D1->p[i]; D1->i[i] = i / 2; Dk->i[i] = D1->i[i]; D1->x[i] = -1 + 2*(i % 2); Dk->x[i] = D1->x[i]; } /* Create a column compressed version of Dk, and delete the old copy */ Dk_cp = cs_compress(Dk); cs_spfree(Dk); for (tk = 1; tk < k; tk++) { /* 'reduce' the virtual size of D1 to: (n-tk-1) x (n-tk), compress into compressed column, saving as D1_cp */ D1->nz = (n-tk-1)*2; D1->m = n-tk-1; D1->n = n-tk; D1_cp = cs_compress(D1); /* Construct diagonal matrix of differences: */ delta_k = cs_spalloc(n-tk, n-tk, (n-tk), 1, 1); for(i = 0; i < n - tk; i++) { delta_k->p[i] = i; delta_k->i[i] = i; delta_k->x[i] = tk / (x[tk + i] - x[i]); } delta_k->nz = n-tk; delta_k_cp = cs_compress(delta_k); D1_x_delta = cs_multiply(D1_cp, delta_k_cp); /* Execute the matrix multiplication */ Dk_next = cs_multiply(D1_x_delta, Dk_cp); /* Free temporary cs matricies created in each loop */ cs_spfree(D1_cp); cs_spfree(delta_k); cs_spfree(delta_k_cp); cs_spfree(D1_x_delta); cs_spfree(Dk_cp); Dk_cp = Dk_next; } cs_spfree(D1); return Dk_cp; }
/** * @brief Main wrapper for fitting a trendfilter model. * Takes as input either a sequence of lambda tuning parameters, or the number * of desired lambda values. In the latter case the function will also calculate * a lambda sequence. The user must supply allocated memory to store the output, * with the function itself returning only @c void. For default values, and an * example of how to call the function, see the function tf_admm_default. * * @param y a vector of responses * @param x a vector of response locations; must be in increasing order * @param w a vector of sample weights * @param n the length of y, x, and w * @param k degree of the trendfilter; i.e., k=1 linear * @param family family code for the type of fit; family=0 for OLS * @param max_iter maximum number of ADMM interations; ignored for k=0 * @param lam_flag 0/1 flag for whether lambda sequence needs to be estimated * @param lambda either a sequence of lambda when lam_flag=0, or empty * allocated space if lam_flag=1 * @param nlambda number of lambda values; need for both lam_flag=0 and 1 * @param lambda_min_ratio minimum ratio between min and max lambda; ignored for lam_flag=0 * @param beta allocated space of size n*nlambda to store the output coefficents * @param obj allocated space of size max_iter*nlambda to store the objective * @param iter allocated space of size nlambda to store the number of iterations * @param status allocated space of size nlambda to store the status of each run * @param rho tuning parameter for the ADMM algorithm * @param obj_tol stopping criteria tolerance * @param alpha_ls for family != 0, line search tuning parameter * @param gamma_ls for family != 0, line search tuning parameter * @param max_iter_ls for family != 0, max number of iterations in line search * @param max_iter_newton for family != 0, max number of iterations in inner ADMM * @param verbose 0/1 flag for printing progress * @return void * @see tf_admm_default */ void tf_admm (double * y, double * x, double * w, int n, int k, int family, int max_iter, int lam_flag, double * lambda, int nlambda, double lambda_min_ratio, double * beta, double * obj, int * iter, int * status, double rho, double obj_tol, double alpha_ls, double gamma_ls, int max_iter_ls, int max_iter_newton, int verbose) { int i; int j; double max_lam; double min_lam; double * temp_n; double * beta_max; double * alpha; double * u; cs * D; cs * Dt; cs * Dk; cs * Dkt; cs * DktDk; gqr * Dt_qr; gqr * Dkt_qr; beta_max = (double *) malloc(n * sizeof(double)); temp_n = (double *) malloc(n * sizeof(double)); alpha = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */ u = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */ /* Assume w does not have zeros */ for(i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]); D = tf_calc_dk(n, k+1, x); Dk = tf_calc_dktil(n, k, x); Dt = cs_transpose(D, 1); diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */ Dkt = cs_transpose(Dk, 1); Dt_qr = glmgen_qr(Dt); Dkt_qr = glmgen_qr(Dkt); DktDk = cs_multiply(Dkt,Dk); /* Determine the maximum lambda in the path, and initiate the path if needed * using the input lambda_min_ratio and equally spaced log points. */ max_lam = tf_maxlam(n, y, Dt_qr, w); if (!lam_flag) { min_lam = max_lam * lambda_min_ratio; lambda[0] = max_lam; for (i = 1; i < nlambda; i++) lambda[i] = exp((log(max_lam) * (nlambda - i -1) + log(min_lam) * i) / (nlambda-1)); } rho = rho * pow( (x[n-1] - x[0])/n, (double)k); /* Initiate alpha and u for a warm start */ if (lambda[0] < max_lam * 1e-5) { for (i = 0; i < n - k; i++) { alpha[i] = 0; u[i] = 0; } } else { /* beta_max */ for (i = 0; i < n; i++) temp_n[i] = -sqrt(w[i]) * y[i]; glmgen_qrsol (Dt_qr, temp_n); for (i = 0; i < n; i++) beta_max[i] = 0; cs_gaxpy(Dt, temp_n, beta_max); /* Dt has a W^{-1/2}, so in the next step divide by sqrt(w) instead of w. */ for (i = 0; i < n; i++) beta_max[i] = y[i] - beta_max[i]/sqrt(w[i]); /* alpha_max */ tf_dxtil(x, n, k, beta_max, alpha); /* u_max */ switch (family) { case FAMILY_GAUSSIAN: for (i = 0; i < n; i++) u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]); break; case FAMILY_LOGISTIC: for (i = 0; i < n; i++) { u[i] = logi_b2(beta_max[i]) * w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]); } break; case FAMILY_POISSON: for (i = 0; i < n; i++) { u[i] = pois_b2(beta_max[i]) * w[i] *(beta_max[i] - y[i]) / (rho * lambda[0]); } break; default: for (i = 0; i < nlambda; i++) status[i] = 2; return; } glmgen_qrsol (Dkt_qr, u); } /* Iterate lower level functions over all lambda values; * the alpha and u vectors get used each time of subsequent * warm starts */ for (i = 0; i < nlambda; i++) { /* warm start */ double * beta_init = (i == 0) ? beta_max : beta + (i-1)*n; for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j]; switch (family) { case FAMILY_GAUSSIAN: tf_admm_gauss(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol, DktDk, verbose); break; case FAMILY_LOGISTIC: tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton, DktDk, &logi_b, &logi_b1, &logi_b2, verbose); break; case FAMILY_POISSON: tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton, DktDk, &pois_b, &pois_b1, &pois_b2, verbose); break; } /* If there any NaNs in beta: reset beta, alpha, u */ if(has_nan(beta + i * n, n)) { for(j = 0; j < n; j++) beta[i*n + j] = 0; for(j = 0; j < n-k; j++) { alpha[j] = 0; u[j] = 0; } status[i] = 1; printf("Numerical error in lambda[%d]=%f",i,lambda[i]); } } cs_spfree(D); cs_spfree(Dt); cs_spfree(Dk); cs_spfree(Dkt); cs_spfree(DktDk); glmgen_gqr_free(Dt_qr); glmgen_gqr_free(Dkt_qr); free(temp_n); free(beta_max); free(alpha); free(u); }
/** * @brief Main wrapper for fitting a trendfilter model. * Takes as input either a sequence of lambda tuning parameters, or the number * of desired lambda values. In the latter case the function will also calculate * a lambda sequence. The user must supply allocated memory to store the output, * with the function itself returning only @c void. For default values, and an * example of how to call the function, see the function tf_admm_default. * * @param x a vector of data locations; must be in increasing order * @param y a vector of responses * @param w a vector of sample weights * @param n the length of x, y, and w * @param k polynomial degree of the fitted trend; i.e., k=1 for linear * @param family family code for the type of fit; family=0 for OLS * @param max_iter maximum number of ADMM interations; ignored for k=0 * @param beta0 initialization value of beta for first lambda; ignored if NULL * @param lam_flag 0/1 flag for whether lambda sequence needs to be estimated * @param lambda either a sequence of lambda when lam_flag=0, or empty * allocated space if lam_flag=1 * @param nlambda number of lambda values; need for both lam_flag=0 and 1 * @param lambda_min_ratio minimum ratio between min and max lambda; ignored for lam_flag=0 * @param df allocated space of nlambda to store the output df values * @param beta allocated space of size n*nlambda to store the output coefficents * @param obj allocated space of size max_iter*nlambda to store the objective * @param iter allocated space of size nlambda to store the number of iterations * @param status allocated space of size nlambda to store the status of each run * @param rho tuning parameter for the ADMM algorithm * @param obj_tol stopping criteria tolerance * @param obj_tol_newton for family != 0, stopping criteria tolerance for prox Newton * @param alpha_ls for family != 0, line search tuning parameter * @param gamma_ls for family != 0, line search tuning parameter * @param max_iter_ls for family != 0, max number of iterations in line search * @param max_iter_newton for family != 0, max number of iterations in inner ADMM * @param verbose 0/1 flag for printing progress * @return void * @see tf_admm_default */ void tf_admm ( double * x, double * y, double * w, int n, int k, int family, int max_iter, double * beta0, int lam_flag, double * lambda, int nlambda, double lambda_min_ratio, int tridiag, int * df, double * beta, double * obj, int * iter, int * status, double rho, double obj_tol, double obj_tol_newton, double alpha_ls, double gamma_ls, int max_iter_ls, int max_iter_newton, int verbose) { int i; int j; int numDualVars; double max_lam; double min_lam; double * temp_n; double * beta_max; double * alpha; double * u; double * A0; double * A1; double * v; cs * D; cs * Dt; cs * Dk; cs * Dkt; cs * DktDk; gqr * Dt_qr; gqr * Dkt_qr; beta_max = (double *) malloc(n * sizeof(double)); temp_n = (double *) malloc(n * sizeof(double)); v = (double *) malloc(n * sizeof(double)); numDualVars = tridiag ? k : 1; /* we use extra buffer below (n vs n-k) */ alpha = (double *) malloc(n * numDualVars * sizeof(double)); u = (double *) malloc(n * numDualVars * sizeof(double)); /* Assume w does not have zeros */ for (i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]); D = tf_calc_dk(n, k+1, x); Dk = tf_calc_dktil(n, k, x); Dt = cs_transpose(D, 1); diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */ Dkt = cs_transpose(Dk, 1); Dt_qr = glmgen_qr(Dt); Dkt_qr = glmgen_qr(Dkt); DktDk = cs_multiply(Dkt,Dk); /* Determine the maximum lambda in the path */ max_lam = tf_maxlam(n, y, Dt_qr, w); /* and if it is too small, return a trivial solution for Gaussian case */ if (family == FAMILY_GAUSSIAN) { if (max_lam < 1e-12) { for (i=0; i<nlambda; i++) { for (j=0; j<n; j++) beta[i*n+j] = y[j]; obj[i*(max_iter+1)] = 0; df[i] = n; } cs_spfree(D); cs_spfree(Dt); cs_spfree(Dk); cs_spfree(Dkt); cs_spfree(DktDk); glmgen_gqr_free(Dt_qr); glmgen_gqr_free(Dkt_qr); free(temp_n); free(beta_max); free(alpha); free(u); return; } } else { max_lam += 1; } /* Initiate the path if needed using the input lambda_min_ratio and * equally spaced points in log space. */ if (!lam_flag) seq_logspace(max_lam,lambda_min_ratio,nlambda,lambda); /* Augmented Lagrangian parameter */ rho = rho * pow((x[n-1] - x[0])/(double)(n-1), (double)k); /* Initiate alpha and u for a warm start */ if (lambda[0] < max_lam * 1e-5) for (i = 0; i < n - k; i++) alpha[i] = u[i] = 0; else { /* beta_max */ if (beta0 == NULL) calc_beta_max(y,w,n,Dt_qr,Dt,temp_n,beta_max); else memcpy(beta_max, beta0, n*sizeof(double)); /* Check if beta = weighted mean(y) is better than beta */ double yc = weighted_mean(y,w,n); for (i = 0; i < n; i++) temp_n[i] = yc; double obj1 = tf_obj(x,y,w,n,k,max_lam,family,beta_max,v); double obj2 = tf_obj(x,y,w,n,k,max_lam,family,temp_n,v); if(obj2 < obj1) memcpy(beta_max, temp_n, n*sizeof(double)); /* alpha_max */ if (tridiag && k>0) { tf_dx1(x, n, 1, beta_max, alpha + (n*k-n)); for (j=k-1; j >= 1; j--) tf_dx1(x, n, k-j+1, alpha + (n*j), alpha + (n*j-n)); } else if (k>0) tf_dxtil(x, n, k, beta_max, alpha); /* u_max */ if (tridiag) for (j=0; j<k; j++) memset(u + (n*j), 0, (n-k+j) * sizeof(double)); else { for (i = 0; i < n; i++) u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]); if(family == FAMILY_LOGISTIC) for (i = 0; i < n; i++) u[i] *= logi_b2(beta_max[i]); else if(family == FAMILY_POISSON) for (i = 0; i < n; i++) u[i] *= pois_b2(beta_max[i]); glmgen_qrsol (Dkt_qr, u); // for (i = 0; i < n-k; i++) u[i] = 0; } } if (tridiag && k>0) { /* Setup tridiagonal systems */ A0 = (double*) malloc(n*k*sizeof(double)); A1 = (double*) malloc(n*k*sizeof(double)); for (j=2; j <= k; j++) { form_tridiag(x, n, k-j+2, 1, 1, A0+(n*j-n), A1+(n*j-n)); } } /* Iterate lower level functions over all lambda values; * the alpha and u vectors get used each time of subsequent * warm starts */ for (i = 0; i < nlambda; i++) { /* warm start */ double *beta_init = (i == 0) ? beta_max : beta + (i-1)*n; for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j]; if (tridiag) { form_tridiag(x, n, 1, rho * lambda[i], 0, A0, A1); for (j=0; j < n; j++) A0[j] = A0[j] + w[j]; } switch (family) { case FAMILY_GAUSSIAN: if (tridiag) tf_admm_gauss_tri(x, y, w, n, k, max_iter, lambda[i], df+i, beta+i*n, alpha, u, obj+i*(1+max_iter), iter+i, rho * lambda[i], obj_tol, A0, A1, verbose); else tf_admm_gauss(x, y, w, n, k, max_iter, lambda[i], df+i, beta+i*n, alpha, u, obj+i*(1+max_iter), iter+i, rho * lambda[i], obj_tol, DktDk, verbose); break; case FAMILY_LOGISTIC: tf_admm_glm(x, y, w, n, k, max_iter, lambda[i], tridiag, df+i, beta+i*n, alpha, u, obj+i*(1+max_iter_newton), iter+i, rho * lambda[i], obj_tol, obj_tol_newton, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton, DktDk, A0, A1, &logi_b, &logi_b1, &logi_b2, verbose); break; case FAMILY_POISSON: tf_admm_glm(x, y, w, n, k, max_iter, lambda[i], tridiag, df+i, beta+i*n, alpha, u, obj+i*(1+max_iter_newton), iter+i, rho * lambda[i], obj_tol, obj_tol_newton, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton, DktDk, A0, A1, &pois_b, &pois_b1, &pois_b2, verbose); break; default: printf("Unknown family, stopping calculation.\n"); status[i] = 2; } /* If there any NaNs in beta: reset beta, alpha, u */ if (has_nan(beta + i*n, n)) { double yc = weighted_mean(y,w,n); switch(family) { case FAMILY_POISSON: yc = (yc > 0) ? log(yc) : -DBL_MAX; break; case FAMILY_LOGISTIC: yc = (yc > 0) ? ( yc < 1 ? log(yc/(1-yc)) : DBL_MAX) : -DBL_MAX; break; default: break; } for (j = 0; j < n; j++) beta[i*n + j] = yc; for (j = 0; j < n-k; j++) alpha[j] = 0; for (j = 0; j < n; j++) u[j] = w[j] * (beta[i*n+j] - y[j]) / (rho * lambda[i]); glmgen_qrsol (Dkt_qr, u); if (tridiag) for (j = 0; j < n*k; j++) alpha[j] = u[j] = 0; status[i] = 1; } } cs_spfree(D); cs_spfree(Dt); cs_spfree(Dk); cs_spfree(Dkt); cs_spfree(DktDk); glmgen_gqr_free(Dt_qr); glmgen_gqr_free(Dkt_qr); free(beta_max); free(temp_n); free(alpha); free(u); free(v); if (tridiag && k>0) { free(A0); free(A1); } }
int main ( void ) { cs *A; cs *AT; cs *C; cs *D; cs *Eye; int i; int m; cs *T; printf ( "\n" ); printf ( "CS_DEMO1:\n" ); printf ( " Demonstration of the CSPARSE package.\n" ); /* Load the triplet matrix T from standard input. */ T = cs_load ( stdin ); /* Print T. */ printf ( "T:\n" ); cs_print ( T, 0 ); /* A = compressed-column form of T. */ A = cs_triplet ( T ); printf ( "A:\n" ); cs_print ( A, 0 ); /* Clear T. */ cs_spfree ( T ); /* AT = A'. */ AT = cs_transpose ( A, 1 ); printf ( "AT:\n" ); cs_print ( AT, 0 ); /* M = number of rows of A. */ m = A->m; /* Create triplet identity matrix. */ T = cs_spalloc ( m, m, m, 1, 1 ); for ( i = 0; i < m; i++ ) { cs_entry ( T, i, i, 1 ); } /* Eye = speye ( m ) */ Eye = cs_triplet ( T ); cs_spfree ( T ); /* Compute C = A * A'. */ C = cs_multiply ( A, AT ); /* Compute D = C + Eye * norm (C,1). */ D = cs_add ( C, Eye, 1, cs_norm ( C ) ); printf ( "D:\n" ); cs_print ( D, 0 ); /* Clear A, AT, C, D, Eye, */ cs_spfree ( A ); cs_spfree ( AT ); cs_spfree ( C ); cs_spfree ( D ); cs_spfree ( Eye ); /* Terminate. */ printf ( "\n" ); printf ( "CS_DEMO1:\n" ); printf ( " Normal end of execution.\n" ); return ( 0 ); }