Example #1
0
// Coordinate descent for binomial models
SEXP cdfit_binomial(SEXP X_, SEXP y_, SEXP penalty_, SEXP lambda, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP multiplier, SEXP alpha_, SEXP dfmax_, SEXP user_, SEXP warn_) {

  // Declarations
  int n = length(y_);
  int p = length(X_)/n;
  int L = length(lambda);
  SEXP res, beta0, beta, Dev, iter;
  PROTECT(beta0 = allocVector(REALSXP, L));
  double *b0 = REAL(beta0);
  for (int i=0; i<L; i++) b0[i] = 0;
  PROTECT(beta = allocVector(REALSXP, L*p));
  double *b = REAL(beta);
  for (int j=0; j<(L*p); j++) b[j] = 0;
  PROTECT(Dev = allocVector(REALSXP, L));
  PROTECT(iter = allocVector(INTSXP, L));
  for (int i=0; i<L; i++) INTEGER(iter)[i] = 0;
  double *a = Calloc(p, double);    // Beta from previous iteration
  for (int j=0; j<p; j++) a[j] = 0;
  double a0 = 0;                    // Beta0 from previous iteration
  double *X = REAL(X_);
  double *y = REAL(y_);
  const char *penalty = CHAR(STRING_ELT(penalty_, 0));
  double *lam = REAL(lambda);
  double eps = REAL(eps_)[0];
  int max_iter = INTEGER(max_iter_)[0];
  double gamma = REAL(gamma_)[0];
  double *m = REAL(multiplier);
  double alpha = REAL(alpha_)[0];
  int dfmax = INTEGER(dfmax_)[0];
  int user = INTEGER(user_)[0];
  int warn = INTEGER(warn_)[0];
  double *r = Calloc(n, double);
  double *w = Calloc(n, double);
  double *s = Calloc(n, double);
  double *z = Calloc(p, double);
  double *eta = Calloc(n, double);
  int *e1 = Calloc(p, int);
  for (int j=0; j<p; j++) e1[j] = 0;
  int *e2 = Calloc(p, int);
  for (int j=0; j<p; j++) e2[j] = 0;
  double xwr, xwx, pi, u, v, cutoff, l1, l2, shift, si;
  int converged, lstart;

  // Initialization
  double ybar = sum(y, n)/n;
  a0 = b0[0] = log(ybar/(1-ybar));
  double nullDev = 0;
  for (int i=0;i<n;i++) nullDev = nullDev - y[i]*log(ybar) - (1-y[i])*log(1-ybar);
  for (int i=0; i<n; i++) s[i] = y[i] - ybar;
  for (int i=0; i<n; i++) eta[i] = a0;
  for (int j=0; j<p; j++) z[j] = crossprod(X, s, n, j)/n;

  // If lam[0]=lam_max, skip lam[0] -- closed form sol'n available
  if (user) {
    lstart = 0;
  } else {
    lstart = 1;
    REAL(Dev)[0] = nullDev;
  }

  // Path
  for (int l=lstart; l<L; l++) {
    if (l != 0) {
      // Assign a, a0
      a0 = b0[l-1];
      for (int j=0; j<p; j++) a[j] = b[(l-1)*p+j];

      // Check dfmax
      int nv = 0;
      for (int j=0; j<p; j++) {
	if (a[j] != 0) nv++;
      }
      if (nv > dfmax) {
	for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER;
	res = cleanupB(s, w, a, r, e1, e2, z, eta, beta0, beta, Dev, iter);
	return(res);
      }

      // Determine eligible set
      if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lam[l-1];
      if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lam[l-1]);
      if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lam[l-1]);
      for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1;
    } else {

      // Determine eligible set
      double lmax = 0;
      for (int j=0; j<p; j++) if (fabs(z[j]) > lmax) lmax = fabs(z[j]);
      if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lmax;
      if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lmax);
      if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lmax);
      for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1;
    }

    while (INTEGER(iter)[l] < max_iter) {
      while (INTEGER(iter)[l] < max_iter) {
	while (INTEGER(iter)[l] < max_iter) {
	  INTEGER(iter)[l]++;
	  REAL(Dev)[l] = 0;
	  for (int i=0;i<n;i++) {
	    if (eta[i] > 10) {
	      pi = 1;
	      w[i] = .0001;
	    } else if (eta[i] < -10) {
	      pi = 0;
	      w[i] = .0001;
	    } else {
	      pi = exp(eta[i])/(1+exp(eta[i]));
	      w[i] = pi*(1-pi);
	    }
	    s[i] = y[i] - pi;
	    r[i] = s[i]/w[i];
	    if (y[i]==1) REAL(Dev)[l] = REAL(Dev)[l] - log(pi);
	    if (y[i]==0) REAL(Dev)[l] = REAL(Dev)[l] - log(1-pi);
	  }
	  if (REAL(Dev)[l]/nullDev < .01) {
	    if (warn) warning("Model saturated; exiting...");
	    for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER;
	    res = cleanupB(s, w, a, r, e1, e2, z, eta, beta0, beta, Dev, iter);
	    return(res);
	  }

	  // Intercept
	  xwr = crossprod(w, r, n, 0);
	  xwx = sum(w, n);
	  b0[l] = xwr/xwx + a0;
	  for (int i=0; i<n; i++) {
	    si = b0[l] - a0;
	    r[i] -= si;
	    eta[i] += si;
	  }

	  // Covariates
	  for (int j=0; j<p; j++) {
	    if (e1[j]) {

	      // Calculate u, v
	      xwr = wcrossprod(X, r, w, n, j);
	      xwx = wsqsum(X, w, n, j);
	      u = xwr/n + (xwx/n)*a[j];
	      v = xwx/n;

	      // Update b_j
	      l1 = lam[l] * m[j] * alpha;
	      l2 = lam[l] * m[j] * (1-alpha);
	      if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(u, l1, l2, gamma, v);
	      if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(u, l1, l2, gamma, v);
	      if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(u, l1, l2, v);

	      // Update r
	      shift = b[l*p+j] - a[j];
	      if (shift !=0) {
		/* for (int i=0;i<n;i++) r[i] -= shift*X[j*n+i]; */
		/* for (int i=0;i<n;i++) eta[i] += shift*X[j*n+i]; */
		for (int i=0;i<n;i++) {
		  si = shift*X[j*n+i];
		  r[i] -= si;
		  eta[i] += si;
		}
	      }
	    }
	  }

	  // Check for convergence
	  converged = checkConvergence(b, a, eps, l, p);
	  a0 = b0[l];
	  for (int j=0; j<p; j++) a[j] = b[l*p+j];
	  if (converged) break;
	}

	// Scan for violations in strong set
	int violations = 0;
	for (int j=0; j<p; j++) {
	  if (e1[j]==0 & e2[j]==1) {
	    z[j] = crossprod(X, s, n, j)/n;
	    l1 = lam[l] * m[j] * alpha;
	    if (fabs(z[j]) > l1) {
	      e1[j] = e2[j] = 1;
	      violations++;
	    }
	  }
	}
	if (violations==0) break;
      }

      // Scan for violations in rest
      int violations = 0;
      for (int j=0; j<p; j++) {
	if (e2[j]==0) {
	  z[j] = crossprod(X, s, n, j)/n;
	  l1 = lam[l] * m[j] * alpha;
	  if (fabs(z[j]) > l1) {
	    e1[j] = e2[j] = 1;
	    violations++;
	  }
	}
      }
      if (violations==0) break;
    }
  }
  res = cleanupB(s, w, a, r, e1, e2, z, eta, beta0, beta, Dev, iter);
  return(res);
}
Example #2
0
// Coordinate descent for gaussian models
SEXP cdfit_gaussian(SEXP X_, SEXP y_, SEXP penalty_, SEXP lambda, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP multiplier, SEXP alpha_, SEXP dfmax_, SEXP user_) {

  // Declarations
  int n = length(y_);
  int p = length(X_)/n;
  int L = length(lambda);
  SEXP res, beta, loss, iter;
  PROTECT(beta = allocVector(REALSXP, L*p));
  double *b = REAL(beta);
  for (int j=0; j<(L*p); j++) b[j] = 0;
  PROTECT(loss = allocVector(REALSXP, L));
  PROTECT(iter = allocVector(INTSXP, L));
  for (int i=0; i<L; i++) INTEGER(iter)[i] = 0;
  double *a = Calloc(p, double); // Beta from previous iteration
  for (int j=0; j<p; j++) a[j]=0;
  double *X = REAL(X_);
  double *y = REAL(y_);
  const char *penalty = CHAR(STRING_ELT(penalty_, 0));
  double *lam = REAL(lambda);
  double eps = REAL(eps_)[0];
  int max_iter = INTEGER(max_iter_)[0];
  double gamma = REAL(gamma_)[0];
  double *m = REAL(multiplier);
  double alpha = REAL(alpha_)[0];
  int dfmax = INTEGER(dfmax_)[0];
  int user = INTEGER(user_)[0];
  double *r = Calloc(n, double);
  for (int i=0; i<n; i++) r[i] = y[i];
  double *z = Calloc(p, double);
  for (int j=0; j<p; j++) z[j] = crossprod(X, r, n, j)/n;
  int *e1 = Calloc(p, int);
  for (int j=0; j<p; j++) e1[j] = 0;
  int *e2 = Calloc(p, int);
  for (int j=0; j<p; j++) e2[j] = 0;
  double cutoff, l1, l2;
  int lstart;

  // If lam[0]=lam_max, skip lam[0] -- closed form sol'n available
  double rss = gLoss(r,n);
  if (user) {
    lstart = 0;
  } else {
    REAL(loss)[0] = rss;
    lstart = 1;
  }
  double sdy = sqrt(rss/n);

  // Path
  for (int l=lstart;l<L;l++) {
    R_CheckUserInterrupt();
    if (l != 0) {
      // Assign a
      for (int j=0;j<p;j++) a[j] = b[(l-1)*p+j];

      // Check dfmax
      int nv = 0;
      for (int j=0; j<p; j++) {
	if (a[j] != 0) nv++;
      }
      if (nv > dfmax) {
	for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER;
	res = cleanupG(a, r, e1, e2, z, beta, loss, iter);
	return(res);
      }

      // Determine eligible set
      if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lam[l-1];
      if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lam[l-1]);
      if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lam[l-1]);
      for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1;
    } else {
      // Determine eligible set
      double lmax = 0;
      for (int j=0; j<p; j++) if (fabs(z[j]) > lmax) lmax = fabs(z[j]);
      if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lmax;
      if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lmax);
      if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lmax);
      for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1;
    }

    while (INTEGER(iter)[l] < max_iter) {
      while (INTEGER(iter)[l] < max_iter) {
	while (INTEGER(iter)[l] < max_iter) {
	  // Solve over the active set
	  INTEGER(iter)[l]++;
          double maxChange = 0;
	  for (int j=0; j<p; j++) {
	    if (e1[j]) {
	      z[j] = crossprod(X, r, n, j)/n + a[j];

	      // Update beta_j
	      l1 = lam[l] * m[j] * alpha;
	      l2 = lam[l] * m[j] * (1-alpha);
	      if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(z[j], l1, l2, gamma, 1);
	      if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(z[j], l1, l2, gamma, 1);
	      if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(z[j], l1, l2, 1);

	      // Update r
	      double shift = b[l*p+j] - a[j];
	      if (shift !=0) {
                for (int i=0;i<n;i++) r[i] -= shift*X[j*n+i];
                if (fabs(shift) > maxChange) maxChange = fabs(shift);
              }
	    }
	  }

	  // Check for convergence
	  for (int j=0; j<p; j++) a[j] = b[l*p+j];
	  if (maxChange < eps*sdy) break;
	}

	// Scan for violations in strong set
	int violations = 0;
	for (int j=0; j<p; j++) {
	  if (e1[j]==0 & e2[j]==1) {

	    z[j] = crossprod(X, r, n, j)/n;

	    // Update beta_j
	    l1 = lam[l] * m[j] * alpha;
	    l2 = lam[l] * m[j] * (1-alpha);
	    if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(z[j], l1, l2, gamma, 1);
	    if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(z[j], l1, l2, gamma, 1);
	    if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(z[j], l1, l2, 1);

	    // If something enters the eligible set, update eligible set & residuals
	    if (b[l*p+j] !=0) {
	      e1[j] = e2[j] = 1;
	      for (int i=0; i<n; i++) r[i] -= b[l*p+j]*X[j*n+i];
	      a[j] = b[l*p+j];
	      violations++;
	    }
	  }
	}
	if (violations==0) break;
      }

      // Scan for violations in rest
      int violations = 0;
      for (int j=0; j<p; j++) {
	if (e2[j]==0) {

	  z[j] = crossprod(X, r, n, j)/n;

	  // Update beta_j
	  l1 = lam[l] * m[j] * alpha;
	  l2 = lam[l] * m[j] * (1-alpha);
	  if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(z[j], l1, l2, gamma, 1);
	  if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(z[j], l1, l2, gamma, 1);
	  if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(z[j], l1, l2, 1);

	  // If something enters the eligible set, update eligible set & residuals
	  if (b[l*p+j] !=0) {
	    e1[j] = e2[j] = 1;
	    for (int i=0; i<n; i++) r[i] -= b[l*p+j]*X[j*n+i];
	    a[j] = b[l*p+j];
	    violations++;
	  }
	}
      }

      if (violations==0) {
	break;
      }
    }
    REAL(loss)[l] = gLoss(r, n);
  }
  res = cleanupG(a, r, e1, e2, z, beta, loss, iter);
  return(res);
}