/** * @brief Lower level function for predicting from a Gaussian loss function. * Generally called from tf_predict. * * @param x the original positions used in the fit; length n * @param beta the beta vector for the prediction * @param n number of observations * @param k polynomial degree of the fitted trend * @param x0 the new positions to predict at * @param n0 the number of observations in x0 * @param pred allocated space for the predicted values * @param zero_tol tolerance for rounding a basis coefficient to zero * @return void * @see tf_predict */ void tf_predict_gauss(double * x, double * beta, int n, int k, double * x0, int n0, double * pred, double zero_tol) { int i; int j; int l; double * phi; double * theta; double k_fac; double h; if(n0 <= 0) return; /* Compute phi (polynomial coefficients) */ phi = (double *)malloc((k+1)*sizeof(double)); poly_coefs(x,beta,k,phi); /* Compute theta (falling fact coefficients) */ theta = (double *)malloc((n)*sizeof(double)); tf_dx(x,n,k+1,beta,theta); k_fac = glmgen_factorial(k); for(i=0; i<n-k-1; i++) theta[i] /= k_fac; /* Threshold small values */ for (i=0; i<n-k-1; i++) if (fabs(theta[i])<zero_tol) theta[i]=0; /* Compute the predictions at each new point x0 */ for (j=0; j<n0; j++) { pred[j] = 0; /* Loop over x points, polynomial basis */ for (i=0; i<k+1; i++) { h = 1; for (l=0; l<i; l++) { h *= (x0[j]-x[l]); } pred[j] += phi[i]*h; } /* Loop over x points, falling fact basis */ for (i=0; i<n-k-1; i++) { /* If the current x0 is too small, then break */ if (x0[j]<=x[i+k]) break; /* Otherwise check the ith coef, and if it is nonzero, * compute the contribution of the ith basis function */ if (theta[i]!=0) { h = 1; for (l=0; l<k; l++) { h *= (x0[j]-x[i+l+1]); } pred[j] += theta[i]*h; } } } free(phi); free(theta); }
/** * @brief Multiplies a vector by D tilde, without having to * explictly construct or use the matrix D. * * @param x locations of the responses * @param n number of observations * @param k order of the trendfilter * @param a the input vector to multiply * @param b allocated space for the output * @return void * @see tf_dx */ void tf_dxtil(double *x, int n, int k,double *a, double *b) { int i; tf_dx(x, n, k, a, b); if( k > 0 ) for(i=0; i < n-k; i++) { b[i] = b[i] * k/( x[k+i] - x[i] ); } }
/** * @brief Low level fitting routine for non-Gaussian trend filtering problems. * Can be configured to handle arbirary losses, as it takes the link function * and it first two derivaties as inputs. Fits the solution for a single value * of lambda. Most users will want to call tf_admm, rather than tf_admm_glm directly. * * @param y a vector of responses * @param x a vector of response locations; must be in increasing order * @param w a vector of sample weights * @param n the length of y, x, and w * @param k degree of the trendfilter; i.e., k=1 linear * @param max_iter maximum number of ADMM interations; ignored for k=0 * @param lam the value of lambda * @param beta allocated space for output coefficents; must pre-fill as it is used in warm start * @param alpha allocated space for ADMM alpha covariates; must pre-fill as it is used in warm start * @param u allocated space for ADMM u covariates; must pre-fill as it is used in warm start * @param obj allocated space to store the objective; will fill at most max_iter elements * @param iter allocated space to store the number of iterations; will fill just one element * @param status allocated space of size nlambda to store the status of each run * @param rho tuning parameter for the ADMM algorithm; set to 1 for default * @param obj_tol stopping criteria tolerance; set to 1e-10 for default * @param alpha_ls for family != 0, line search tuning parameter * @param gamma_ls for family != 0, line search tuning parameter * @param max_iter_ls for family != 0, max number of iterations in line search * @param max_iter_newton for family != 0, max number of iterations in inner ADMM * @param DktDk pointer to the inner product of DktDk * @param b the link function for a given loss * @param b1 first derivative of the link function for a given loss * @param b2 second derivative of the link function for a given loss * @param verbose 0/1 flag for printing progress * @return void * @see tf_admm */ void tf_admm_glm (double * y, double * x, double * w, int n, int k, int max_iter, double lam, double * beta, double * alpha, double * u, double * obj, int * iter, double rho, double obj_tol, double alpha_ls, double gamma_ls, int max_iter_ls, int max_iter_newton, cs * DktDk, func_RtoR b, func_RtoR b1, func_RtoR b2, int verbose) { double * d; /* line search direction */ double * yt;/* working response: ytilde */ double * H; /* weighted Hessian */ double * z; double * obj_admm; int i; int * iter_ls; int it; int iter_admm; double * Db; double * Dd; double loss; double pen; double t; /* stepsize */ d = (double*) malloc(n*sizeof(double)); /* line search direction */ yt = (double*) malloc(n*sizeof(double));/* working response: ytilde */ H = (double*) malloc(n*sizeof(double)); /* weighted Hessian */ z = (double*) malloc(n*sizeof(double)); /* Buffers for line search */ Db = (double *) malloc(n*sizeof(double)); Dd = (double *) malloc(n*sizeof(double)); iter_ls = (int *) malloc(sizeof(int)); obj_admm = (double*)malloc(max_iter*sizeof(double)); if (verbose) printf("\nlambda=%0.3e\n",lam); if (verbose) printf("Iteration\tObjective\tLoss\tPenalty\tADMM iters\n"); /* One Prox Newton step per iteration */ for (it=0; it < max_iter_newton; it++) { /* Define weighted Hessian, and working response */ for(i=0; i<n; i++) { H[i] = w[i] * b2(beta[i]); if (fabs(H[i])>WEIGHT_SMALL) { yt[i] = beta[i] + (y[i]-b1(beta[i]))/H[i]; } else { yt[i] = beta[i] + (y[i]-b1(beta[i])); } } /* Prox Newton step */ iter_admm = 0; tf_admm_gauss (yt, x, H, n, k, max_iter, lam, d, alpha, u, obj_admm, &iter_admm, rho, obj_tol, DktDk, 0); for(i=0; i<n; i++) { d[i] = d[i] - beta[i]; } t = line_search(y, x, w, n, k, lam, b, b1, beta, d, alpha_ls, gamma_ls, max_iter_ls, iter_ls, Db, Dd); /* if (verbose) printf("Stepsize t=%.3e,\titers=%d\n", t, *iter_ls+1); */ for(i=0; i<n; i++) { beta[i] = beta[i] + t * d[i]; } /* Compute objective */ /* Compute loss */ loss = 0; for (i=0; i<n; i++) loss += w[i] * (-y[i]*beta[i] + b(beta[i])); /* Compute penalty */ tf_dx(x,n,k+1,beta,z); /* IMPORTANT: use k+1 here! */ pen = 0; for (i=0; i<n-k-1; i++) pen += fabs(z[i]); obj[it] = loss+lam*pen; if (verbose) printf("\t%i\t%0.3e\t%0.3e\t%0.3e\t%i\n",it+1,obj[it],loss,lam*pen,iter_admm); if(it > 0) { if( fabs(obj[it] - obj[it-1]) < fabs(obj[it]) * obj_tol ) { break; } } } *iter = it; /* free */ free(d); free(yt); free(H); free(z); free(iter_ls); free(Db); free(Dd); free(obj_admm); }
/** * @brief Low level fitting routine for a Gaussian trend filtering problem. * Function used by tf_admm to fit a Gaussian ADMM trendfilter, or as a * subproblem by tf_admm_glm when using logistic or poisson losses. Fits * the solution for a single value of lambda. Most users will want to call * tf_admm, rather than tf_admm_gauss directly. * * @param y a vector of responses * @param x a vector of response locations; must be in increasing order * @param w a vector of sample weights * @param n the length of y, x, and w * @param k degree of the trendfilter; i.e., k=1 linear * @param max_iter maximum number of ADMM interations; ignored for k=0 * @param lam the value of lambda * @param beta allocated space for output coefficents; must pre-fill as it is used in warm start * @param alpha allocated space for ADMM alpha covariates; must pre-fill as it is used in warm start * @param u allocated space for ADMM u covariates; must pre-fill as it is used in warm start * @param obj allocated space to store the objective; will fill at most max_iter elements * @param iter allocated space to store the number of iterations; will fill just one element * @param rho tuning parameter for the ADMM algorithm; set to 1 for default * @param obj_tol stopping criteria tolerance; set to 1e-10 for default * @param DktDk pointer to the inner product of DktDk * @param verbose 0/1 flag for printing progress * @return void * @see tf_admm */ void tf_admm_gauss (double * y, double * x, double * w, int n, int k, int max_iter, double lam, double * beta, double * alpha, double * u, double * obj, int * iter, double rho, double obj_tol, cs * DktDk, int verbose) { int i; int it; double *v; double *z; double *db; double loss; double pen; cs * kernmat; gqr * kernmat_qr; /* Special case for k=0: skip the ADMM algorithm */ if (k==0) { /* Use Nick's DP algorithm, weighted version */ tf_dp_weight(n,y,w,lam,beta); db = (double *) malloc(n*sizeof(double)); /* Compute objective */ loss = 0; pen = 0; for (i=0; i<n; i++) loss += w[i]*(y[i]-beta[i])*(y[i]-beta[i]); loss = loss/2; tf_dx(x,n,k+1,beta,db); /* IMPORTANT: use k+1 here! */ for (i=0; i<n-k-1; i++) pen += fabs(db[i]); obj[0] = loss+lam*pen; free(db); return; } /* Otherwise we run our ADMM routine */ /* Construct the kernel matrix and its QR decomposition */ kernmat = scalar_plus_diag(DktDk, rho, w); kernmat_qr = glmgen_qr(kernmat); /* Other variables that will be useful during our iterations */ v = (double*) malloc(n*sizeof(double)); z = (double*) malloc(n*sizeof(double)); if (verbose) printf("\nlambda=%0.3e\n",lam); if (verbose) printf("Iteration\tObjective\tLoss\tPenalty\n"); for(it=0; it < max_iter; it++) { /* Update beta: banded linear system (kernel matrix) */ for (i=0; i < n-k; i++) v[i] = alpha[i] + u[i]; tf_dtxtil(x,n,k,v,z); for (i=0; i<n; i++) beta[i] = w[i]*y[i] + rho*z[i]; /* Solve the least squares problem with sparse QR */ glmgen_qrsol(kernmat_qr, beta); /* Update alpha: 1d fused lasso * Build the response vector */ tf_dxtil(x,n,k,beta,v); for (i=0; i<n-k; i++) { z[i] = v[i]-u[i]; } /* Use Nick's DP algorithm */ tf_dp(n-k,z,lam/rho,alpha); /* Update u: dual update */ for (i=0; i<n-k; i++) { u[i] = u[i]+alpha[i]-v[i]; } /* Compute loss */ loss = 0; for (i=0; i<n; i++) loss += w[i]*(y[i]-beta[i])*(y[i]-beta[i]); loss = loss/2; /* Compute penalty */ tf_dx(x,n,k+1,beta,z); /* IMPORTANT: use k+1 here! */ pen = 0; for (i=0; i<n-k-1; i++) pen += fabs(z[i]); obj[it] = loss+lam*pen; if (verbose) printf("%i\t%0.3e\t%0.3e\t%0.3e\n",it+1,obj[it],loss,lam*pen); /* Stop if relative difference of objective values <= obj_tol */ if(it > 0) { if( fabs(obj[it] - obj[it-1]) < fabs(obj[it]) * obj_tol ) break; } } *iter = it; cs_spfree(kernmat); glmgen_gqr_free(kernmat_qr); free(v); free(z); }