int lr_train_update_b( lr_train *lrt) { /* X,w,z -> b */ /* [1t] [1t] Compute b = (( [--] W [1|X])^-1) * [--] W z, where W = diag(w). [Xt] [Xt] */ int numatts, i, iters; double cgeps, cgdeveps, val; dyv *B, *initb; numatts = lrt->numatts; /* We are now using initial CG residuaal for scaling cgeps. This is best done inside mk_lr_cgresult(). */ /* cgeps = lrt->numatts * lrt->opts->cgeps; */ cgeps = lrt->opts->cgeps; cgdeveps = lrt->opts->cgdeveps; /* Create initb. */ initb = NULL; if (lrt->opts->cgbinit) { initb = mk_dyv( numatts); dyv_set( initb, 0, lrt_b0_ref(lrt)); for (i=1; i<numatts; ++i) { val = dyv_ref( lrt_b_ref(lrt), i-1); dyv_set( initb, i, val); } } B = mk_lr_update_b_conjugate_gradient_helper( lrt, cgeps, cgdeveps, lrt->opts->cgmax, &iters, initb); if (initb != NULL) free_dyv( initb); /* Break newb into ( b0, b ). */ lrt_b0_set(lrt, dyv_ref( B, 0)); for (i=1; i<numatts; ++i) { val = dyv_ref( B, i); dyv_set( lrt_b_ref(lrt), i-1, val); } free_dyv( B); /* Hitting cgmax is considered a failure. */ if ( iters > lrt->opts->cgmax) return -2; return 1; }
void lr_cg_multA( const dyv *v, dyv *result, void *userdata) { double lambda; lr_train *lrt; dyv *Av, *lv; lrt = (lr_train *) userdata; /* Do sparse matrix-vector multiply. */ Av = mk_lr_XtWXv_dyv( lrt, v); lambda = lrt->opts->rrlambda; if (lambda > 0.0) { /* Add Ridge Regression term. */ lv = mk_dyv_scalar_mult( v, lambda); dyv_set( lv, 0, 0.0); /* Don't penalize constant term. */ dyv_plus( Av, lv, result); free_dyv( lv); } else { /* Don't do Ridge Regression. */ copy_dyv( Av, result); } free_dyv( Av); return; }
lr_predict *mk_in_lr_predict( PFILE *f) { int i, size; double val; dyv *dv, *b; lr_predict *lrp; lrp = AM_MALLOC( lr_predict); dv = mk_dyv_read( f); size = dyv_size( dv); lrp->b0 = dyv_ref( dv, 0); b = mk_dyv( size-1); for (i=1; i<size; ++i) { val = dyv_ref( dv, i); dyv_set( b, i-1, val); } lrp->b = b; free_dyv( dv); return lrp; }
void lr_train_update_z( lr_train *lrt) { /* y,n,u,w -> z */ int i; double yi, ni, ui, wi, val; for (i=0; i < lrt->numrows; ++i) { yi = dyv_ref( lrt->y, i); ni = dyv_ref( lrt_n_ref(lrt), i); ui = dyv_ref( lrt_u_ref(lrt), i); wi = dyv_ref( lrt_w_ref(lrt), i); val = ni + (yi-ui) / wi; #ifndef AMFAST if (!am_isnum( val)) { my_errorf( "lr_train_update_z: NaN or Inf problem: val is %f.\n" "Inputs: i=%d, yi=%f, ni=%f, ui=%f, wi=%f\n", val, i, yi, ni, ui, wi); } #endif dyv_set( lrt_z_ref(lrt), i, val); } return; }
integ *mk_integ( double (*h)(double parameter,double constant,double x), double xlo, double xhi, double parameter, double constant, int size ) /* Returns an it in which it->integral[i] = integal_from_xlo_to(x_lo + h*i) of h(parameter,x) dx ------------------------------------------------ integal_from_xlo_to_x_hi of h(parameter,x) dx */ { integ *it = AM_MALLOC(integ); dyv *dig = mk_dyv(size); int i; double sum = 0.0; double last_pdf = 0.0; double delta = (xhi - xlo) / (size-1); if ( h(parameter,constant,xhi) > 1e-6 ) my_error("Hmm... I was really hoping h(parameter,xhi) == 0"); dyv_set(dig,0,0.0); for ( i = 1 ; i < size ; i++ ) { double x = xlo + i * delta; double this_pdf = h(parameter,constant,x); if (i == 1) sum += delta * this_pdf; else sum += delta * (this_pdf + last_pdf) / 2.0; dyv_set(dig,i,sum); last_pdf = this_pdf; /* added 2/26/97 JGS */ } dyv_scalar_mult(dig,1.0 / sum,dig); it -> integral = dig; it -> xlo = xlo; it -> xhi = xhi; it -> parameter = parameter; it -> constant = constant; return(it); }
void lr_train_update_w( lr_train *lrt) { /* u -> w */ int i; double ui, val; for (i=0; i < lrt->numrows; ++i) { ui = dyv_ref( lrt_u_ref(lrt), i); val = ui * (1-ui); dyv_set( lrt_w_ref(lrt), i, val); } return; }
void out_lr_predict( PFILE *f, lr_predict *lrp) { int nump, i; double val; dyv *dv; nump = dyv_size( lrp->b) + 1; /* Copy b0, b into a single dyv. */ dv = mk_dyv( nump); dyv_set( dv, 0, lrp->b0); for (i=1; i<nump; ++i) { val = dyv_ref( lrp->b, i-1); dyv_set( dv, i, val); } dyv_write( f, dv); free_dyv( dv); return; }
void lr_compute_u_from_n( dyv *n, dyv *u) { int numrows, i; double en, val, ni; numrows = dyv_size( n); for (i=0; i < numrows; ++i) { ni = dyv_ref( n, i); en = exp(ni); val = en / (1.0 + en); dyv_set( u, i, val); } return; }
void lr_compute_n_from_dym( const dym *M, double b0, dyv *b, dyv *n) { int numrows, numgood, row, j; double sum; numrows = dym_rows( M); numgood = dyv_size(b); for (row=0; row < numrows; ++row) { sum = 0.0; for (j=0; j<numgood; ++j) sum += dym_ref( M, row, j) * dyv_ref( b, j); sum += b0; dyv_set( n, row, sum); } return; }
void diag_precond( const dyv *v, dyv *result, void *userdata) { /* Get diagonal ( [1t] ) diag( [--] W [1|X] ) = [ m_ii = Sum(x_ki^2 * w_k over k) ] ( [Xt] ) In the sparse case, X is binary and x_ki^2 == x_ki, and the diagonal is [ m_ii = Sum(w_k over posrows_i) ]. Preconditioning matrix is the diagonal matrix. Multiply inverse of this matrix time v, which is an element-wise product. */ int colidx; double divisor, val; ivec *posrows; dyv *w; lr_train *lrt; lrt = (lr_train *) userdata; if (lrt->X == NULL) { my_error( "diag_precond: dense problems not yet supported."); } w = lrt_w_ref( lrt); val = dyv_ref( v, 0); dyv_set( result, 0, val / dyv_sum( w)); for (colidx=1; colidx < lrt->numatts; ++colidx) { posrows = spardat_attnum_to_posrows( lrt->X, colidx-1); divisor = dyv_partial_sum( w, posrows); val = dyv_ref( v, colidx); dyv_set( result, colidx, val / divisor); } return; }
void lr_compute_n_from_spardat( const spardat *X, double b0, dyv *b, dyv *n) { int numrows, row; ivec *posatts; double sum; numrows = spardat_num_rows( X); for (row=0; row < numrows; ++row) { posatts = spardat_row_to_posatts( X, row); /* Remember that at one time we made a copy of posatts because of a very mysterious and hardwared/compiler-looking bug. */ sum = dyv_partial_sum( b, posatts); sum += b0; dyv_set( n, row, sum); } return; }
dyv *mk_dyv_read( PFILE *f) { int i, size, lineno; double val; char line[101]; dyv *dv; lineno = 1; line[100] = '\0'; /* Read size and make dyv. */ if (pfeof(f)) { my_errorf( "mk_dyv_read: unexpected end-of-file while reading size,\n" "after line %d of file", lineno); } if (pfgets( line, 100, f) == NULL) { my_errorf( "mk_dyv_read: failed to read line %d from the passed stream.", lineno); } else lineno++; size = atoi( line); dv = mk_dyv( size); /* Read values. */ for (i=0; i<size; ++i) { if (pfeof(f)) { my_errorf( "mk_dyv_read: unexpected end-of-file while reading %d vals,\n" "after line %d of file (after the %dth value)", size, lineno, lineno-1); } if (pfgets( line, 100, f) == NULL) { my_errorf( "mk_dyv_read: failed to read line %d from the passed stream.", lineno); } else lineno++; val = atof( line); dyv_set( dv, i, val); } return dv; }
dyv *mk_dyv_x( int size, ...) { /* Warning: no type checking can be done by the compiler. You *must* send the values as doubles for this to work correctly. */ int i; double val; va_list argptr; dyv *dv; dv = mk_dyv( size); va_start( argptr, size); for (i=0; i<size; ++i) { val = va_arg( argptr, double); dyv_set( dv, i, val); } va_end(argptr); return dv; }
dyv *mk_lr_cgresult_cgeps( lr_train *lrt, double unscaled_cgeps, int maxiters, conjgrad *cg) { int iters, bestiter, window; double rsqr, bestrsqr, decay, cgeps, decthresh; dyv *x, *result, *rsqrhist; dyv_array *paramhist; /* Initialize paramters. */ rsqrhist = mk_constant_dyv( maxiters, FLT_MAX); paramhist = mk_array_of_null_dyvs( maxiters); bestrsqr = FLT_MAX; window = lrt->opts->cgwindow; decay = lrt->opts->cgdecay; decthresh = FLT_MAX; /* Scale cgeps. */ rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r)); if (Verbosity >= 2) printf( " CGINITIAL RSQR: %g\n", rsqr); cgeps = unscaled_cgeps * rsqr; /* Store initial position in history. */ iters = 0; dyv_set( rsqrhist, iters, rsqr); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg)); iters += 1; /* Abort iterations if rsqr gets too small for calcs to proceed. */ while (rsqr >= cgeps) { if (Verbosity > 3) { fprintf_oneline_dyv( stdout, " CG POS:", cg->cgs->x, "\n"); } /* Non-epsilon termination conditions. */ if (iters >= maxiters) break; if (window <= 0) break; if (rsqr > decthresh) break; /* Iterate. */ cgiter( cg); /* CG resisdual Euclidean norm. */ rsqr = dyv_magnitude( conjgrad_r_ref( cg)); /* Store history. */ dyv_set( rsqrhist, iters, rsqr); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg)); if (Verbosity >= 2) printf( " CGEPS RSQR: %g\n", rsqr); /* Update records. */ if (rsqr <= bestrsqr) { bestrsqr = rsqr; window = lrt->opts->cgwindow; decthresh = decay * bestrsqr; } else window -= 1; /* Count number of iters. */ iters += 1; } /* Select parameters. */ /* CG residual: use last iteration's parameter vector. */ /* x = conjgrad_x_ref( cg); */ /* Get best params from paramhist. */ bestiter = dyv_argmin( rsqrhist); x = dyv_array_ref( paramhist, bestiter); if (x == NULL) { my_errorf( "mk_lr_cgresult_cgeps: NULL param vec %d", bestiter); } if (Verbosity >= 2) { rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r)); printf( " CGFINAL RSQR: %g\n", rsqr); } result = mk_copy_dyv( x); free_dyv_array( paramhist); free_dyv( rsqrhist); return result; }
/* Exactly one of X and ds should be NULL. */ lr_train *mk_lr_train( spardat *X, dym *factors, dyv *outputs, dyv *initb, lr_options *opts) { /* initb is copied into lr->b. */ int converge, rc; int numiters, bestiter; double dev, olddev; dyv *devhist; lr_train *lrt; lr_state *bestlrs; lr_statearr *lrsarr; /* Create lr_train struct. */ if (X != NULL) lrt = mk_lr_train_from_spardat( X, opts); else lrt = mk_lr_train_from_dym( factors, outputs, opts); /* Set initial value of model parameters, if desired. */ if (initb != NULL) lr_train_overwrite_b( lrt, initb); /* Initialize our loop state */ dev = -1000.0; lrsarr = mk_array_of_null_lr_states( opts->lrmax); devhist = mk_constant_dyv( opts->lrmax, FLT_MAX); /* START OF IRLS ITERATIONS */ /* Iterate until the change in deviance is relatively small. */ for (numiters=0; numiters < opts->lrmax; ++numiters) { /* Update olddev and iterate. */ olddev = dev; rc = lr_train_iterate(lrt); /* Test for convergence. */ lr_statearr_set( lrsarr, numiters, lrt->lrs); converge = lr_deviance_test( lrt, opts->lreps, olddev, &dev); dyv_set( devhist, numiters, dev); /* Print stuff. */ if (Verbosity >= 1) printf( "."); if (Verbosity >= 3) { printf( "LR ITER %d: likesat: %g, likelihood: %g, deviance: %g\n", numiters, lrt->likesat, lr_log_likelihood_from_deviance( dev, lrt->likesat), dev); } if (Verbosity >= 5) { /* Print all or most extreme attributes. */ printf( " Params, b0: %g\n", lrt->lrs->b0); fprintf_oneline_dyv( stdout, " Params, b:", lrt->lrs->b, "\n"); } if (converge) break; else if (rc == -2) break; /* Exceeded cgmax. */ else if (am_isnan(dev)) break; } /* END OF ITERATIONS */ /* Check state history for best holdout performance. */ bestiter = dyv_argmin( devhist); bestlrs = lr_statearr_ref( lrsarr, bestiter); free_lr_state( lrt->lrs); lrt->lrs = mk_copy_lr_state( bestlrs); if (converge) lrt->lrs->converged = converge; if (Verbosity == 1) printf( "\n"); if (Verbosity >= 2) { printf( "CHOOSING ITERATION %d WITH DEVIANCE %g\n", bestiter, dyv_ref( devhist, bestiter)); } if (Verbosity >= 2) { fprintf_oneline_dyv( stdout, " devhist:", devhist, "\n"); } /* Free state history. */ free_lr_statearr( lrsarr); free_dyv( devhist); /* Done. */ return lrt; }
dyv *mk_lr_cgresult_cgdeveps( lr_train *lrt, double cgdeveps, int maxiters, conjgrad *cg) { int iters, bestiter, window; double dev, olddev, bestdev, rsqr, decay, decthresh; dyv *devhist, *x, *result; dyv_array *paramhist; /* Run conjugate gradient. */ devhist = mk_constant_dyv( maxiters, FLT_MAX); paramhist = mk_array_of_null_dyvs( maxiters); dev = -FLT_MAX; bestdev = FLT_MAX; window = lrt->opts->cgwindow; decay = lrt->opts->cgdecay; decthresh = FLT_MAX; /* Scale cgeps. */ rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r)); /* Store initial position in history. */ iters = 0; dev = lr_deviance_from_cg( lrt, cg); dyv_set( devhist, iters, dev); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg)); iters += 1; /* Abort the iters if rsqr gets too small for calcs to proceed. */ while (rsqr > 1e-300) { if (Verbosity > 3) { fprintf_oneline_dyv( stdout, " CG POS:", cg->cgs->x, "\n"); } /* Non-deviance termination criteria. */ if (iters > maxiters) break; /* Strict, since we start with iters=1. */ if (window <= 0) break; if (dev > decthresh) break; /* Iterate. */ olddev = dev; cgiter( cg); /* Relative difference of deviance. */ dev = lr_deviance_from_cg( lrt, cg); if (dev <= bestdev) { bestdev = dev; window = lrt->opts->cgwindow; decthresh = decay * bestdev; } else window -= 1; /* Store history. */ dyv_set( devhist, iters, dev); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg) /* good params */); if (Verbosity >= 2) printf( "CG DEVIANCE: %g\n", dev); /* Terminate on rel diff of deviance. */ if (fabs(olddev-dev) < dev*cgdeveps) break; /* Count number of iters. */ iters += 1; /* We must calculate rsqr for the while-loop condition. */ rsqr = dyv_magnitude( conjgrad_r_ref( cg)); } /* Select parameters. */ /* Get best params from paramhist. */ bestiter = dyv_argmin( devhist); x = dyv_array_ref( paramhist, bestiter); if (x == NULL) { my_errorf( "mk_lr_cgresult_cgdeveps: NULL param vec %d", bestiter); } result = mk_copy_dyv( x); free_dyv_array( paramhist); free_dyv( devhist); return result; }