// Coordinate descent for gaussian models SEXP cdfit_gaussian(SEXP X_, SEXP y_, SEXP penalty_, SEXP lambda, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP multiplier, SEXP alpha_, SEXP dfmax_, SEXP user_) { // Declarations int n = length(y_); int p = length(X_)/n; int L = length(lambda); SEXP res, beta, loss, iter; PROTECT(beta = allocVector(REALSXP, L*p)); double *b = REAL(beta); for (int j=0; j<(L*p); j++) b[j] = 0; PROTECT(loss = allocVector(REALSXP, L)); PROTECT(iter = allocVector(INTSXP, L)); for (int i=0; i<L; i++) INTEGER(iter)[i] = 0; double *a = Calloc(p, double); // Beta from previous iteration for (int j=0; j<p; j++) a[j]=0; double *X = REAL(X_); double *y = REAL(y_); const char *penalty = CHAR(STRING_ELT(penalty_, 0)); double *lam = REAL(lambda); double eps = REAL(eps_)[0]; int max_iter = INTEGER(max_iter_)[0]; double gamma = REAL(gamma_)[0]; double *m = REAL(multiplier); double alpha = REAL(alpha_)[0]; int dfmax = INTEGER(dfmax_)[0]; int user = INTEGER(user_)[0]; double *r = Calloc(n, double); for (int i=0; i<n; i++) r[i] = y[i]; double *z = Calloc(p, double); for (int j=0; j<p; j++) z[j] = crossprod(X, r, n, j)/n; int *e1 = Calloc(p, int); for (int j=0; j<p; j++) e1[j] = 0; int *e2 = Calloc(p, int); for (int j=0; j<p; j++) e2[j] = 0; double cutoff, l1, l2; int lstart; // If lam[0]=lam_max, skip lam[0] -- closed form sol'n available double rss = gLoss(r,n); if (user) { lstart = 0; } else { REAL(loss)[0] = rss; lstart = 1; } double sdy = sqrt(rss/n); // Path for (int l=lstart;l<L;l++) { R_CheckUserInterrupt(); if (l != 0) { // Assign a for (int j=0;j<p;j++) a[j] = b[(l-1)*p+j]; // Check dfmax int nv = 0; for (int j=0; j<p; j++) { if (a[j] != 0) nv++; } if (nv > dfmax) { for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER; res = cleanupG(a, r, e1, e2, z, beta, loss, iter); return(res); } // Determine eligible set if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lam[l-1]; if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lam[l-1]); if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lam[l-1]); for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1; } else { // Determine eligible set double lmax = 0; for (int j=0; j<p; j++) if (fabs(z[j]) > lmax) lmax = fabs(z[j]); if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lmax; if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lmax); if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lmax); for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1; } while (INTEGER(iter)[l] < max_iter) { while (INTEGER(iter)[l] < max_iter) { while (INTEGER(iter)[l] < max_iter) { // Solve over the active set INTEGER(iter)[l]++; double maxChange = 0; for (int j=0; j<p; j++) { if (e1[j]) { z[j] = crossprod(X, r, n, j)/n + a[j]; // Update beta_j l1 = lam[l] * m[j] * alpha; l2 = lam[l] * m[j] * (1-alpha); if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(z[j], l1, l2, gamma, 1); if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(z[j], l1, l2, gamma, 1); if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(z[j], l1, l2, 1); // Update r double shift = b[l*p+j] - a[j]; if (shift !=0) { for (int i=0;i<n;i++) r[i] -= shift*X[j*n+i]; if (fabs(shift) > maxChange) maxChange = fabs(shift); } } } // Check for convergence for (int j=0; j<p; j++) a[j] = b[l*p+j]; if (maxChange < eps*sdy) break; } // Scan for violations in strong set int violations = 0; for (int j=0; j<p; j++) { if (e1[j]==0 & e2[j]==1) { z[j] = crossprod(X, r, n, j)/n; // Update beta_j l1 = lam[l] * m[j] * alpha; l2 = lam[l] * m[j] * (1-alpha); if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(z[j], l1, l2, gamma, 1); if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(z[j], l1, l2, gamma, 1); if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(z[j], l1, l2, 1); // If something enters the eligible set, update eligible set & residuals if (b[l*p+j] !=0) { e1[j] = e2[j] = 1; for (int i=0; i<n; i++) r[i] -= b[l*p+j]*X[j*n+i]; a[j] = b[l*p+j]; violations++; } } } if (violations==0) break; } // Scan for violations in rest int violations = 0; for (int j=0; j<p; j++) { if (e2[j]==0) { z[j] = crossprod(X, r, n, j)/n; // Update beta_j l1 = lam[l] * m[j] * alpha; l2 = lam[l] * m[j] * (1-alpha); if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(z[j], l1, l2, gamma, 1); if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(z[j], l1, l2, gamma, 1); if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(z[j], l1, l2, 1); // If something enters the eligible set, update eligible set & residuals if (b[l*p+j] !=0) { e1[j] = e2[j] = 1; for (int i=0; i<n; i++) r[i] -= b[l*p+j]*X[j*n+i]; a[j] = b[l*p+j]; violations++; } } } if (violations==0) { break; } } REAL(loss)[l] = gLoss(r, n); } res = cleanupG(a, r, e1, e2, z, beta, loss, iter); return(res); }
SEXP gdfit_gaussian(SEXP X_, SEXP y_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda, SEXP alpha_, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP group_multiplier, SEXP dfmax_, SEXP gmax_, SEXP user_) { // Lengths/dimensions int n = length(y_); int L = length(lambda); int J = length(K1_) - 1; int p = length(X_)/n; // Pointers double *X = REAL(X_); double *y = REAL(y_); const char *penalty = CHAR(STRING_ELT(penalty_, 0)); int *K1 = INTEGER(K1_); int K0 = INTEGER(K0_)[0]; double *lam = REAL(lambda); double alpha = REAL(alpha_)[0]; double eps = REAL(eps_)[0]; int max_iter = INTEGER(max_iter_)[0]; double gamma = REAL(gamma_)[0]; double *m = REAL(group_multiplier); int dfmax = INTEGER(dfmax_)[0]; int gmax = INTEGER(gmax_)[0]; int user = INTEGER(user_)[0]; // Outcome SEXP res, beta, iter, df, loss; PROTECT(beta = allocVector(REALSXP, L*p)); for (int j=0; j<(L*p); j++) REAL(beta)[j] = 0; PROTECT(iter = allocVector(INTSXP, L)); for (int i=0; i<L; i++) INTEGER(iter)[i] = 0; PROTECT(df = allocVector(REALSXP, L)); for (int i=0; i<L; i++) REAL(df)[i] = 0; PROTECT(loss = allocVector(REALSXP, L)); for (int i=0; i<L; i++) REAL(loss)[i] = 0; double *b = REAL(beta); // Intermediate quantities double *r = Calloc(n, double); for (int i=0; i<n; i++) r[i] = y[i]; double *a = Calloc(p, double); for (int j=0; j<p; j++) a[j] = 0; int *e = Calloc(J, int); for (int g=0; g<J; g++) e[g] = 0; int converged, lstart, ng, nv, violations; double shift, l1, l2; // If lam[0]=lam_max, skip lam[0] -- closed form sol'n available if (user) { lstart = 0; } else { REAL(loss)[0] = gLoss(r,n); lstart = 1; } // Path for (int l=lstart; l<L; l++) { R_CheckUserInterrupt(); if (l != 0) { for (int j=0; j<p; j++) a[j] = b[(l-1)*p+j]; // Check dfmax, gmax ng = 0; nv = 0; for (int g=0; g<J; g++) { if (a[K1[g]] != 0) { ng++; nv = nv + (K1[g+1]-K1[g]); } } if (ng > gmax | nv > dfmax) { for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER; res = cleanupG(a, r, e, beta, iter, df, loss); return(res); } } while (INTEGER(iter)[l] < max_iter) { while (INTEGER(iter)[l] < max_iter) { converged = 0; INTEGER(iter)[l]++; REAL(df)[l] = 0; // Update unpenalized covariates for (int j=0; j<K0; j++) { shift = crossprod(X, r, n, j)/n; b[l*p+j] = shift + a[j]; for (int i=0; i<n; i++) r[i] -= shift * X[n*j+i]; REAL(df)[l] += 1; } // Update penalized groups for (int g=0; g<J; g++) { l1 = lam[l] * m[g] * alpha; l2 = lam[l] * m[g] * (1-alpha); if (e[g]) gd_gaussian(b, X, r, g, K1, n, l, p, penalty, l1, l2, gamma, df, a); } // Check convergence if (checkConvergence(b, a, eps, l, p)) { converged = 1; REAL(loss)[l] = gLoss(r,n); break; } for (int j=0; j<p; j++) a[j] = b[l*p+j]; } // Scan for violations violations = 0; for (int g=0; g<J; g++) { if (e[g]==0) { l1 = lam[l] * m[g] * alpha; l2 = lam[l] * m[g] * (1-alpha); gd_gaussian(b, X, r, g, K1, n, l, p, penalty, l1, l2, gamma, df, a); if (b[l*p+K1[g]] != 0) { e[g] = 1; violations++; } } } if (violations==0) { REAL(loss)[l] = gLoss(r, n); break; } for (int j=0; j<p; j++) a[j] = b[l*p+j]; } } res = cleanupG(a, r, e, beta, iter, df, loss); return(res); }