C++ (Cpp) tf_dxtil Examples

Programming Language: C++ (Cpp)

Method/Function: tf_dxtil

Examples at hotexamples.com: 4

C++ (Cpp) tf_dxtil - 4 examples found. These are the top rated real world C++ (Cpp) examples of tf_dxtil extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: tf_admm.c Project: alexdeng/glmgen

/**
 * @brief Low level fitting routine for a Gaussian trend filtering problem.
 * Function used by tf_admm to fit a Gaussian ADMM trendfilter, or as a
 * subproblem by tf_admm_glm when using logistic or poisson losses. Fits
 * the solution for a single value of lambda. Most users will want to call
 * tf_admm, rather than tf_admm_gauss directly.
 *
 * @param y                    a vector of responses
 * @param x                    a vector of response locations; must be in increasing order
 * @param w                    a vector of sample weights
 * @param n                    the length of y, x, and w
 * @param k                    degree of the trendfilter; i.e., k=1 linear
 * @param max_iter             maximum number of ADMM interations; ignored for k=0
 * @param lam                  the value of lambda
 * @param beta                 allocated space for output coefficents; must pre-fill as it is used in warm start
 * @param alpha                allocated space for ADMM alpha covariates; must pre-fill as it is used in warm start
 * @param u                    allocated space for ADMM u covariates; must pre-fill as it is used in warm start
 * @param obj                  allocated space to store the objective; will fill at most max_iter elements
 * @param iter                 allocated space to store the number of iterations; will fill just one element
 * @param rho                  tuning parameter for the ADMM algorithm; set to 1 for default
 * @param obj_tol              stopping criteria tolerance; set to 1e-10 for default
 * @param DktDk                pointer to the inner product of DktDk
 * @param verbose              0/1 flag for printing progress
 * @return void
 * @see tf_admm
 */
void tf_admm_gauss (double * y, double * x, double * w, int n, int k,
       int max_iter, double lam,
       double * beta, double * alpha, double * u,
       double * obj, int * iter,
       double rho, double obj_tol, cs * DktDk, int verbose)
{
  int i;
  int it;
  double *v;
  double *z;
  double *db;
  double loss;
  double pen;

  cs * kernmat;
  gqr * kernmat_qr;

  /* Special case for k=0: skip the ADMM algorithm */
  if (k==0)
  {
    /* Use Nick's DP algorithm, weighted version */
    tf_dp_weight(n,y,w,lam,beta);

    db = (double *) malloc(n*sizeof(double));

    /* Compute objective */
    loss = 0; pen = 0;
    for (i=0; i<n; i++) loss += w[i]*(y[i]-beta[i])*(y[i]-beta[i]);
    loss = loss/2;
    tf_dx(x,n,k+1,beta,db); /* IMPORTANT: use k+1 here! */
    for (i=0; i<n-k-1; i++) pen += fabs(db[i]);
    obj[0] = loss+lam*pen;

    free(db);
    return;
  }

  /* Otherwise we run our ADMM routine */

  /* Construct the kernel matrix and its QR decomposition */
  kernmat = scalar_plus_diag(DktDk, rho, w);
  kernmat_qr = glmgen_qr(kernmat);

  /* Other variables that will be useful during our iterations */
  v = (double*) malloc(n*sizeof(double));
  z = (double*) malloc(n*sizeof(double));

  if (verbose) printf("\nlambda=%0.3e\n",lam);
  if (verbose) printf("Iteration\tObjective\tLoss\tPenalty\n");

  for(it=0; it < max_iter; it++)
  {
    /* Update beta: banded linear system (kernel matrix) */
    for (i=0; i < n-k; i++) v[i] = alpha[i] + u[i];
    tf_dtxtil(x,n,k,v,z);
    for (i=0; i<n; i++) beta[i] = w[i]*y[i] + rho*z[i];
    /* Solve the least squares problem with sparse QR */
    glmgen_qrsol(kernmat_qr, beta);

    /* Update alpha: 1d fused lasso
     * Build the response vector */
    tf_dxtil(x,n,k,beta,v);
    for (i=0; i<n-k; i++)
    {
      z[i] = v[i]-u[i];
    }
    /* Use Nick's DP algorithm */
    tf_dp(n-k,z,lam/rho,alpha);

    /* Update u: dual update */
    for (i=0; i<n-k; i++)
    {
      u[i] = u[i]+alpha[i]-v[i];
    }

    /* Compute loss */
    loss = 0;
    for (i=0; i<n; i++) loss += w[i]*(y[i]-beta[i])*(y[i]-beta[i]);
    loss = loss/2;
    /* Compute penalty */
    tf_dx(x,n,k+1,beta,z); /* IMPORTANT: use k+1 here! */
    pen = 0;
    for (i=0; i<n-k-1; i++) pen += fabs(z[i]);
    obj[it] = loss+lam*pen;

    if (verbose) printf("%i\t%0.3e\t%0.3e\t%0.3e\n",it+1,obj[it],loss,lam*pen);

    /* Stop if relative difference of objective values <= obj_tol */
    if(it > 0)
    {
      if( fabs(obj[it] - obj[it-1]) < fabs(obj[it]) * obj_tol ) break;
    }
  }

  *iter = it;

  cs_spfree(kernmat);
  glmgen_gqr_free(kernmat_qr);
  free(v);
  free(z);
}

Example #2

Show file

File: tf_admm.c Project: dsimba/glmgen

/**
 * @brief Low level fitting routine for a Gaussian trend filtering problem.
 * Function used by tf_admm to fit a Gaussian ADMM trendfilter, or as a
 * subproblem by tf_admm_glm when using logistic or poisson losses. Fits
 * the solution for a single value of lambda. Most users will want to call
 * tf_admm, rather than tf_admm_gauss directly.
 *
 * @param x                    a vector of data locations; must be in increasing order
 * @param y                    a vector of responses
 * @param w                    a vector of sample weights
 * @param n                    the length of x, y, and w
 * @param k                    polynomial degree of the fitted trend; i.e., k=1 for linear
 * @param max_iter             maximum number of ADMM interations; ignored for k=0
 * @param lam                  the value of lambda
 * @param df                   allocated space for df value at the solution
 * @param beta                 allocated space for output coefficents; must pre-fill as it is used in warm start
 * @param alpha                allocated space for ADMM alpha variable; must pre-fill as it is used in warm start
 * @param u                    allocated space for ADMM u variable; must pre-fill as it is used in warm start
 * @param obj                  allocated space to store the objective; will fill at most max_iter elements
 * @param iter                 allocated space to store the number of iterations; will fill just one element
 * @param rho                  tuning parameter for the ADMM algorithm; set to 1 for default
 * @param obj_tol              stopping criteria tolerance; set to 1e-10 for default
 * @param DktDk                pointer to the inner product of DktDk
 * @param verbose              0/1 flag for printing progress
 * @return void
 * @see tf_admm
 */
void tf_admm_gauss (double * x, double * y, double * w, int n, int k,
    int max_iter, double lam, int * df,
    double * beta, double * alpha, double * u,
    double * obj, int * iter,
    double rho, double obj_tol, cs * DktDk, int verbose)
{
  int i;
  int d;
  int it, itbest;
  double *v;
  double *z;
  double *betabest;
  double *alphabest;
  double descent;
  double variation;

  cs * kernmat;
  gqr * kernmat_qr;

  /* Special case for k=0: skip the ADMM algorithm */
  if (k==0)
  {
    /* Use Nick's DP algorithm, weighted version */
    tf_dp_weight(n,y,w,lam,beta);

    /* Compute df value */
    d = 1;
    for (i=0; i<n-1; i++) if (beta[i] != beta[i+1]) d += 1;
    *df = d;

    /* Compute objective */
    v = (double *) malloc(n*sizeof(double));
    obj[0] = tf_obj_gauss(x,y,w,n,k,lam,beta,v);
    free(v);
    return;
  }

  /* Otherwise we run our ADMM routine */

  /* Construct the kernel matrix and its QR decomposition */
  kernmat = scalar_plus_diag(DktDk, rho, w);
  kernmat_qr = glmgen_qr(kernmat);

  /* Other variables that will be useful during our iterations */
  v = (double*) malloc(n*sizeof(double));
  z = (double*) malloc(n*sizeof(double));
  betabest = (double*) malloc(n*sizeof(double));
  alphabest = (double*) malloc(n*sizeof(double));
  
  if (verbose) printf("\nlambda=%0.3e\n",lam);
  if (verbose) printf("Iteration\tObjective\n");

  itbest = 0;
  obj[0] = tf_obj_gauss(x,y,w,n,k,lam,beta,v);
  memcpy(betabest, beta, n * sizeof(double));
  memcpy(alphabest, alpha, n * sizeof(double));
  
  for (it=0; it < max_iter; it++)
  {
    /* Update beta: banded linear system (kernel matrix) */
    for (i=0; i < n-k; i++) v[i] = alpha[i] + u[i];
    tf_dtxtil(x,n,k,v,z);
    for (i=0; i<n; i++) beta[i] = w[i]*y[i] + rho*z[i];
    /* Solve the least squares problem with sparse QR */
    glmgen_qrsol(kernmat_qr, beta);

    /* Update alpha: 1d fused lasso
     * Build the response vector */
    tf_dxtil(x,n,k,beta,v);
    for (i=0; i<n-k; i++) z[i] = v[i]-u[i];

    /* Use Nick's DP algorithm */
    tf_dp(n-k,z,lam/rho,alpha);

    /* Update u: dual update */
    for (i=0; i<n-k; i++) u[i] = u[i]+alpha[i]-v[i];

    /* Compute objective */
    obj[it+1] = tf_obj_gauss(x,y,w,n,k,lam,beta,z);
    if (verbose) printf("%i\t%0.3e\n",it+1,obj[it]);

    /* Stop if relative difference of objective values < obj_tol */
    descent = obj[itbest] - obj[it+1];
    
    if ( descent > 0 ) 
    {
      memcpy(betabest, beta, n * sizeof(double));
      memcpy(alphabest, alpha, n * sizeof(double));
      itbest = it+1;
    }
    if (it >= 10)
    {
      variation = 0;
      for (i=0; i < 10; i++ )
        variation += fabs(obj[it+1-i] - obj[it-i]);
      
      //variation = fabs(obj[it+1] - obj[it]) + fabs(obj[it] - obj[it-1]) + fabs(obj[it-1] - obj[it-2]);
      if (variation < fabs(obj[itbest]) * 10 * obj_tol)
        break;
    }
  }
  
  memcpy(beta, betabest, n * sizeof(double));
  memcpy(alpha, alphabest, n * sizeof(double));  

  *iter = it;
  
  if (verbose)
    printf("itbest = %d it = %d obj[0]= %f  obj.best = %f\n", 
           itbest, it, obj[0], obj[itbest]);

  /* Compute final df value, based on alpha */
  d = k+1;
  for (i=0; i<n-k-1; i++) if (alpha[i] != alpha[i+1]) d += 1;
  *df = d;

  cs_spfree(kernmat);
  glmgen_gqr_free(kernmat_qr);
  free(v);
  free(z);
  free(betabest);
  free(alphabest);  
}

Example #3

Show file

File: tf_admm.c Project: alexdeng/glmgen

/**
 * @brief Main wrapper for fitting a trendfilter model.
 * Takes as input either a sequence of lambda tuning parameters, or the number
 * of desired lambda values. In the latter case the function will also calculate
 * a lambda sequence. The user must supply allocated memory to store the output,
 * with the function itself returning only @c void. For default values, and an
 * example of how to call the function, see the function tf_admm_default.
 *
 * @param y                    a vector of responses
 * @param x                    a vector of response locations; must be in increasing order
 * @param w                    a vector of sample weights
 * @param n                    the length of y, x, and w
 * @param k                    degree of the trendfilter; i.e., k=1 linear
 * @param family               family code for the type of fit; family=0 for OLS
 * @param max_iter             maximum number of ADMM interations; ignored for k=0
 * @param lam_flag             0/1 flag for whether lambda sequence needs to be estimated
 * @param lambda               either a sequence of lambda when lam_flag=0, or empty
 *                             allocated space if lam_flag=1
 * @param nlambda              number of lambda values; need for both lam_flag=0 and 1
 * @param lambda_min_ratio     minimum ratio between min and max lambda; ignored for lam_flag=0
 * @param beta                 allocated space of size n*nlambda to store the output coefficents
 * @param obj                  allocated space of size max_iter*nlambda to store the objective
 * @param iter                 allocated space of size nlambda to store the number of iterations
 * @param status               allocated space of size nlambda to store the status of each run
 * @param rho                  tuning parameter for the ADMM algorithm
 * @param obj_tol              stopping criteria tolerance
 * @param alpha_ls             for family != 0, line search tuning parameter
 * @param gamma_ls             for family != 0, line search tuning parameter
 * @param max_iter_ls          for family != 0, max number of iterations in line search
 * @param max_iter_newton      for family != 0, max number of iterations in inner ADMM
 * @param verbose              0/1 flag for printing progress
 * @return void
 * @see tf_admm_default
 */
void tf_admm (double * y, double * x, double * w, int n, int k, int family,
              int max_iter, int lam_flag, double * lambda,
              int nlambda, double lambda_min_ratio, double * beta,
              double * obj, int * iter, int * status, double rho,
              double obj_tol, double alpha_ls, double gamma_ls,
              int max_iter_ls, int max_iter_newton, int verbose)
{
  int i;
  int j;
  double max_lam;
  double min_lam;
  double * temp_n;
  double * beta_max;
  double * alpha;
  double * u;

  cs * D;
  cs * Dt;
  cs * Dk;
  cs * Dkt;
  cs * DktDk;
  gqr * Dt_qr;
  gqr * Dkt_qr;

  beta_max = (double *) malloc(n * sizeof(double));
  temp_n   = (double *) malloc(n * sizeof(double));
  alpha    = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */
  u        = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */

  /* Assume w does not have zeros */
  for(i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]);

  D = tf_calc_dk(n, k+1, x);
  Dk = tf_calc_dktil(n, k, x);
  Dt = cs_transpose(D, 1);
  diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */
  Dkt = cs_transpose(Dk, 1);
  Dt_qr = glmgen_qr(Dt);
  Dkt_qr = glmgen_qr(Dkt);
  DktDk = cs_multiply(Dkt,Dk);

  /* Determine the maximum lambda in the path, and initiate the path if needed
   * using the input lambda_min_ratio and equally spaced log points.
   */
  max_lam = tf_maxlam(n, y, Dt_qr, w);
  if (!lam_flag)
  {
    min_lam = max_lam * lambda_min_ratio;
    lambda[0] = max_lam;
    for (i = 1; i < nlambda; i++)
      lambda[i] = exp((log(max_lam) * (nlambda - i -1) + log(min_lam) * i) / (nlambda-1));

  }

  rho = rho * pow( (x[n-1] - x[0])/n, (double)k);

  /* Initiate alpha and u for a warm start */
  if (lambda[0] < max_lam * 1e-5)
  {
    for (i = 0; i < n - k; i++)
    {
      alpha[i] = 0;
      u[i] = 0;
    }
  } else {

    /* beta_max */
    for (i = 0; i < n; i++) temp_n[i] = -sqrt(w[i]) * y[i];
    glmgen_qrsol (Dt_qr, temp_n);
    for (i = 0; i < n; i++) beta_max[i] = 0;
    cs_gaxpy(Dt, temp_n, beta_max);
    /* Dt has a W^{-1/2}, so in the next step divide by sqrt(w) instead of w. */
    for (i = 0; i < n; i++) beta_max[i] = y[i] - beta_max[i]/sqrt(w[i]);

    /* alpha_max */
    tf_dxtil(x, n, k, beta_max, alpha);

    /* u_max */
    switch (family)
    {
    case FAMILY_GAUSSIAN:
      for (i = 0; i < n; i++) u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);
      break;

    case FAMILY_LOGISTIC:
      for (i = 0; i < n; i++) {
        u[i] = logi_b2(beta_max[i]) * w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);
      }
      break;

    case FAMILY_POISSON:
      for (i = 0; i < n; i++) {
        u[i] = pois_b2(beta_max[i]) * w[i] *(beta_max[i] - y[i]) / (rho * lambda[0]);
      }
      break;

    default:
      for (i = 0; i < nlambda; i++) status[i] = 2;
      return;
    }

    glmgen_qrsol (Dkt_qr, u);
  }

  /* Iterate lower level functions over all lambda values;
   * the alpha and u vectors get used each time of subsequent
   * warm starts
   */
  for (i = 0; i < nlambda; i++)
  {
    /* warm start */
    double * beta_init = (i == 0) ? beta_max : beta + (i-1)*n;
    for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j];

    switch (family)
    {
      case FAMILY_GAUSSIAN:
        tf_admm_gauss(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha,
                      u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol,
                      DktDk, verbose);
        break;

      case FAMILY_LOGISTIC:
        tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i,
                    rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
                    DktDk, &logi_b, &logi_b1, &logi_b2, verbose);
        break;

      case FAMILY_POISSON:
        tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i,
                    rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
                    DktDk, &pois_b, &pois_b1, &pois_b2, verbose);
        break;
    }

    /* If there any NaNs in beta: reset beta, alpha, u */
    if(has_nan(beta + i * n, n))
    {
      for(j = 0; j < n; j++) beta[i*n + j] = 0;
      for(j = 0; j < n-k; j++) { alpha[j] = 0; u[j] = 0; }
      status[i] = 1;
      printf("Numerical error in lambda[%d]=%f",i,lambda[i]);
    }
  }

  cs_spfree(D);
  cs_spfree(Dt);
  cs_spfree(Dk);
  cs_spfree(Dkt);
  cs_spfree(DktDk);
  glmgen_gqr_free(Dt_qr);
  glmgen_gqr_free(Dkt_qr);

  free(temp_n);
  free(beta_max);
  free(alpha);
  free(u);
}

Example #4

Show file

File: tf_admm.c Project: dsimba/glmgen

/**
 * @brief Main wrapper for fitting a trendfilter model.
 * Takes as input either a sequence of lambda tuning parameters, or the number
 * of desired lambda values. In the latter case the function will also calculate
 * a lambda sequence. The user must supply allocated memory to store the output,
 * with the function itself returning only @c void. For default values, and an
 * example of how to call the function, see the function tf_admm_default.
 *
 * @param x                    a vector of data locations; must be in increasing order
 * @param y                    a vector of responses
 * @param w                    a vector of sample weights
 * @param n                    the length of x, y, and w
 * @param k                    polynomial degree of the fitted trend; i.e., k=1 for linear
 * @param family               family code for the type of fit; family=0 for OLS
 * @param max_iter             maximum number of ADMM interations; ignored for k=0
 * @param beta0                initialization value of beta for first lambda; ignored if NULL
 * @param lam_flag             0/1 flag for whether lambda sequence needs to be estimated
 * @param lambda               either a sequence of lambda when lam_flag=0, or empty
 *                             allocated space if lam_flag=1
 * @param nlambda              number of lambda values; need for both lam_flag=0 and 1
 * @param lambda_min_ratio     minimum ratio between min and max lambda; ignored for lam_flag=0
 * @param df                   allocated space of nlambda to store the output df values
 * @param beta                 allocated space of size n*nlambda to store the output coefficents
 * @param obj                  allocated space of size max_iter*nlambda to store the objective
 * @param iter                 allocated space of size nlambda to store the number of iterations
 * @param status               allocated space of size nlambda to store the status of each run
 * @param rho                  tuning parameter for the ADMM algorithm
 * @param obj_tol              stopping criteria tolerance
 * @param obj_tol_newton       for family != 0, stopping criteria tolerance for prox Newton
 * @param alpha_ls             for family != 0, line search tuning parameter
 * @param gamma_ls             for family != 0, line search tuning parameter
 * @param max_iter_ls          for family != 0, max number of iterations in line search
 * @param max_iter_newton       for family != 0, max number of iterations in inner ADMM
 * @param verbose              0/1 flag for printing progress
 * @return void
 * @see tf_admm_default
 */
void tf_admm ( double * x, double * y, double * w, int n, int k, int family,
    int max_iter, double * beta0, int lam_flag, double * lambda,
    int nlambda, double lambda_min_ratio, int tridiag, int * df,
    double * beta, double * obj, int * iter, int * status,
    double rho, double obj_tol, double obj_tol_newton, double alpha_ls, double gamma_ls,
    int max_iter_ls, int max_iter_newton, int verbose)
{
  int i;
  int j;
  int numDualVars;
  double max_lam;
  double min_lam;
  double * temp_n;
  double * beta_max;
  double * alpha;
  double * u;
  double * A0;
  double * A1;
  double * v;

  cs * D;
  cs * Dt;
  cs * Dk;
  cs * Dkt;
  cs * DktDk;
  gqr * Dt_qr;
  gqr * Dkt_qr;

  beta_max = (double *) malloc(n * sizeof(double));
  temp_n   = (double *) malloc(n * sizeof(double));
  v        = (double *) malloc(n * sizeof(double));

  numDualVars = tridiag ? k : 1;

  /* we use extra buffer below (n vs n-k) */
  alpha    = (double *) malloc(n * numDualVars * sizeof(double)); 
  u        = (double *) malloc(n * numDualVars * sizeof(double)); 

  /* Assume w does not have zeros */
  for (i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]);

  D 	= tf_calc_dk(n, k+1, x);
  Dk 	= tf_calc_dktil(n, k, x);
  Dt 	= cs_transpose(D, 1);

  diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */

  Dkt 	 = cs_transpose(Dk, 1);
  Dt_qr  = glmgen_qr(Dt);
  Dkt_qr = glmgen_qr(Dkt);
  DktDk  = cs_multiply(Dkt,Dk);


  /* Determine the maximum lambda in the path */
  max_lam = tf_maxlam(n, y, Dt_qr, w);
  /* and if it is too small, return a trivial solution for Gaussian case */
  if (family == FAMILY_GAUSSIAN) {
    if (max_lam < 1e-12) {
      for (i=0; i<nlambda; i++) {
        for (j=0; j<n; j++) beta[i*n+j] = y[j];
        obj[i*(max_iter+1)] = 0;
        df[i] = n;
      }
      cs_spfree(D);
      cs_spfree(Dt);
      cs_spfree(Dk);
      cs_spfree(Dkt);
      cs_spfree(DktDk);
      glmgen_gqr_free(Dt_qr);
      glmgen_gqr_free(Dkt_qr);
      free(temp_n);
      free(beta_max);
      free(alpha);
      free(u);
      return;
    }
  }
  else {		
    max_lam += 1;
  }

  /* Initiate the path if needed using the input lambda_min_ratio and 
   * equally spaced points in log space. */
  if (!lam_flag) seq_logspace(max_lam,lambda_min_ratio,nlambda,lambda);

  /* Augmented Lagrangian parameter */
  rho = rho * pow((x[n-1] - x[0])/(double)(n-1), (double)k);
  
  /* Initiate alpha and u for a warm start */
  if (lambda[0] < max_lam * 1e-5)  
    for (i = 0; i < n - k; i++) alpha[i] = u[i] = 0;    
  else {
    /* beta_max */
    if (beta0 == NULL)
      calc_beta_max(y,w,n,Dt_qr,Dt,temp_n,beta_max);
    else
      memcpy(beta_max, beta0, n*sizeof(double));

    /* Check if beta = weighted mean(y) is better than beta */
    double yc = weighted_mean(y,w,n);
    for (i = 0; i < n; i++) temp_n[i] = yc;
    double obj1 = tf_obj(x,y,w,n,k,max_lam,family,beta_max,v);
    double obj2 = tf_obj(x,y,w,n,k,max_lam,family,temp_n,v);
    if(obj2 < obj1) memcpy(beta_max, temp_n, n*sizeof(double));

    /* alpha_max */

    if (tridiag && k>0)
    {
      tf_dx1(x, n, 1, beta_max, alpha + (n*k-n));
      for (j=k-1; j >= 1; j--)
        tf_dx1(x, n, k-j+1, alpha + (n*j), alpha + (n*j-n));      
    }
    else if (k>0)
      tf_dxtil(x, n, k, beta_max, alpha);

    /* u_max */    

    if (tridiag)
      for (j=0; j<k; j++) memset(u + (n*j), 0, (n-k+j) * sizeof(double)); 
    else {
      for (i = 0; i < n; i++) 
          u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);

      if(family == FAMILY_LOGISTIC)
        for (i = 0; i < n; i++) u[i] *= logi_b2(beta_max[i]);
      else if(family == FAMILY_POISSON)
        for (i = 0; i < n; i++) u[i] *= pois_b2(beta_max[i]);
      glmgen_qrsol (Dkt_qr, u);
      // for (i = 0; i < n-k; i++) u[i] = 0;
    }
  }

  if (tridiag && k>0)
  {
    /* Setup tridiagonal systems */  
    A0 = (double*) malloc(n*k*sizeof(double));
    A1 = (double*) malloc(n*k*sizeof(double));

    for (j=2; j <= k; j++)
    {
      form_tridiag(x, n, k-j+2, 1, 1, A0+(n*j-n), A1+(n*j-n));
    }
  }  

  /* Iterate lower level functions over all lambda values;
   * the alpha and u vectors get used each time of subsequent
   * warm starts */
  for (i = 0; i < nlambda; i++)
  {    
    /* warm start */
    double *beta_init = (i == 0) ? beta_max : beta + (i-1)*n;
    for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j];

    if (tridiag)
    {
      form_tridiag(x, n, 1, rho * lambda[i], 0, A0, A1);
      for (j=0; j < n; j++) A0[j] = A0[j] + w[j];
    }

    switch (family) {
      case FAMILY_GAUSSIAN:
        if (tridiag)        
          tf_admm_gauss_tri(x, y, w, n, k, max_iter, lambda[i], df+i, beta+i*n,
              alpha, u, obj+i*(1+max_iter), iter+i, rho * lambda[i],
              obj_tol, A0, A1, verbose);        
        else
          tf_admm_gauss(x, y, w, n, k, max_iter, lambda[i], df+i, beta+i*n,
              alpha, u, obj+i*(1+max_iter), iter+i, rho * lambda[i],
              obj_tol, DktDk, verbose);

        break;

      case FAMILY_LOGISTIC:
        tf_admm_glm(x, y, w, n, k, max_iter, lambda[i], tridiag, df+i, beta+i*n,
            alpha, u, obj+i*(1+max_iter_newton), iter+i, rho * lambda[i], obj_tol,
            obj_tol_newton, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
            DktDk, A0, A1, &logi_b, &logi_b1, &logi_b2, verbose);
        break;

      case FAMILY_POISSON:
        tf_admm_glm(x, y, w, n, k, max_iter, lambda[i], tridiag, df+i, beta+i*n,
            alpha, u, obj+i*(1+max_iter_newton), iter+i, rho * lambda[i], obj_tol,
            obj_tol_newton, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
            DktDk, A0, A1, &pois_b, &pois_b1, &pois_b2, verbose);
        break;

      default:
        printf("Unknown family, stopping calculation.\n");
        status[i] = 2;
    }
    

    /* If there any NaNs in beta: reset beta, alpha, u */
    if (has_nan(beta + i*n, n))
    {
      double yc = weighted_mean(y,w,n);
      switch(family) {
        case FAMILY_POISSON:
          yc = (yc > 0) ? log(yc) : -DBL_MAX;
          break;
        case FAMILY_LOGISTIC:
          yc = (yc > 0) ? ( yc < 1 ? log(yc/(1-yc)) : DBL_MAX) : -DBL_MAX;
          break;
        default: break;
      }
      for (j = 0; j < n; j++) beta[i*n + j] = yc;
      for (j = 0; j < n-k; j++) alpha[j] = 0;
      for (j = 0; j < n; j++) u[j] = w[j] * (beta[i*n+j] - y[j]) / (rho * lambda[i]);
      glmgen_qrsol (Dkt_qr, u);
      if (tridiag) for (j = 0; j < n*k; j++) alpha[j] = u[j] = 0;
      status[i] = 1;
    }
  }

  cs_spfree(D);
  cs_spfree(Dt);
  cs_spfree(Dk);
  cs_spfree(Dkt);
  cs_spfree(DktDk);
  glmgen_gqr_free(Dt_qr);
  glmgen_gqr_free(Dkt_qr);

  free(beta_max);
  free(temp_n);
  free(alpha);
  free(u);
  free(v);

  if (tridiag && k>0)
  {
    free(A0);
    free(A1);
  }
}