void checkdata(int standardize){ int i,j; // means ysum = sum_dvec(Y,n); ybar = ysum/nd; xbar = new_dzero(p); for(j=0; j<p; j++){ for(i=xp[j]; i<xp[j+1]; i++) xbar[j] += xv[i]; xbar[j] *= 1.0/nd; } // dispersion xsd = new_dvec(p); for(j=0; j<p; j++){ H[j] = -nd*xbar[j]*xbar[j]; if(doxx) H[j] += xxv[j*(j+1)/2 + j]; else for(i=xp[j]; i<xp[j+1]; i++) H[j] += xv[i]*xv[i]; if(H[j]==0.0){ W[j] = INFINITY; xsd[j] = 1.0; } else xsd[j] = sqrt(H[j]/nd); } // to scale or not to scale if(!standardize) for(j=0; j<p; j++) xsd[j] = 1.0; }
double* drep(double val, int n) { int i; double *v = new_dvec(n); for(i=0; i<n; i++) v[i] = val; return v; }
double* new_dzero(int n) { int i; double *v = new_dvec(n); for(i=0; i<n; i++) v[i] = 0; return v; }
double* new_dseq(double from, double to, int n) { int i; assert( from <= to); double *v = new_dvec(n); v[0] = from; double by = (to-from)/((double) n-1); for(i=1; i<n; i++) v[i] = v[i-1] + by; return v; }
void gamlr(int *famid, // 1 gaus, 2 bin, 3 pois int *n_in, // nobs int *p_in, // nvar int *N_in, // length of nonzero x entries int *xi_in, // length-l row ids for nonzero x int *xp_in, // length-p+1 pointers to each column start double *xv_in, // nonzero x entry values double *y_in, // length-n y int *prexx, // indicator for pre-calc xx double *xxv_in, // dense columns of upper tri for xx double *eta, // length-n fixed shifts (assumed zero for gaussian) double *varweight, // length-p weights double *obsweight, // length-n weights int *standardize, // whether to scale penalty by sd(x_j) int *nlam, // length of the path double *delta, // path stepsize double *penscale, // gamma in the GL paper double *thresh, // cd convergence int *maxit, // cd max iterations double *lambda, // output lambda double *deviance, // output deviance double *df, // output df double *alpha, // output intercepts double *beta, // output coefficients int *exits, // exit status. 0 is normal int *verb) // talk? { dirty = 1; // flag to say the function has been called // time stamp for periodic R interaction time_t itime = time(NULL); /** Build global variables **/ fam = *famid; n = *n_in; p = *p_in; nd = (double) n; pd = (double) p; N = *N_in; W = varweight; V = obsweight; E = eta; Y = y_in; xi = xi_in; xp = xp_in; xv = xv_in; doxx = *prexx; xxv = xxv_in; H = new_dvec(p); checkdata(*standardize); A=0.0; B = new_dzero(p); G = new_dzero(p); ag0 = new_dzero(p); gam = *penscale; npass = itertotal = 0; // some local variables double Lold, NLLHD, Lsat; int s; // family dependent settings switch( fam ) { case 2: nllhd = &bin_nllhd; reweight = &bin_reweight; A = log(ybar/(1-ybar)); Lsat = 0.0; break; case 3: nllhd = &po_nllhd; reweight = &po_reweight; A = log(ybar); // nonzero saturated deviance Lsat = ysum; for(int i=0; i<n; i++) if(Y[i]!=0) Lsat += -Y[i]*log(Y[i]); break; default: fam = 1; // if it wasn't already nllhd = &lin_nllhd; A = (ysum - sum_dvec(eta,n))/nd; Lsat=0.0; } if(fam!=1){ Z = new_dvec(n); vxbar = new_dvec(p); vxz = new_dvec(p); } else{ Z = Y; vxz = new_dzero(p); if(V[0]!=0){ vxbar = new_dvec(p); vstats(); } else{ vxbar = xbar; vsum = nd; for(int j=0; j<p; j++) for(int i=xp[j]; i<xp[j+1]; i++) vxz[j] += xv[i]*Z[xi[i]]; } } l1pen = INFINITY; Lold = INFINITY; NLLHD = nllhd(n, A, E, Y); if(*verb) speak("*** n=%d observations and p=%d covariates ***\n", n,p); // move along the path for(s=0; s<*nlam; s++){ // deflate the penalty if(s>0) lambda[s] = lambda[s-1]*(*delta); l1pen = lambda[s]*nd; // run descent exits[s] = cdsolve(*thresh,*maxit); // update parameters and objective itertotal += npass; Lold = NLLHD; NLLHD = nllhd(n, A, E, Y); deviance[s] = 2.0*(NLLHD - Lsat); df[s] = dof(s, lambda, NLLHD); alpha[s] = A; copy_dvec(&beta[s*p],B,p); if(s==0) *thresh *= deviance[0]; // relativism // gamma lasso updating for(int j=0; j<p; j++) if(isfinite(gam)){ if( (W[j]>0.0) & isfinite(W[j]) ) W[j] = 1.0/(1.0+gam*fabs(B[j])); } else if(B[j]!=0.0){ W[j] = 0.0; } // verbalize if(*verb) speak("segment %d: lambda = %.4g, dev = %.4g, npass = %d\n", s+1, lambda[s], deviance[s], npass); // exit checks if(deviance[s]<0.0){ exits[s] = 1; shout("Warning: negative deviance. "); } if(df[s] >= nd){ exits[s] = 1; shout("Warning: saturated model. "); } if(exits[s]){ shout("Finishing path early.\n"); *nlam = s; break; } itime = interact(itime); } *maxit = itertotal; gamlr_cleanup(); }
double* new_dup_dvec(double *v, int n) { double* dv_new = new_dvec(n); copy_dvec(dv_new, v, n); return dv_new; }