Esempio n. 1
0
dyv *mk_lr_XtWXv_dyv( const lr_train *lrt, const dyv *v)
{
  /* Compute [1t]
             [--] W [1|X] v
             [Xt]            */

  double v0, cterm;
  dyv *subv, *Xv, *XtWXv;

  /* Split v into v0=v[0] and subv=v[1:] */
  v0 = dyv_ref( v, 0);
  subv = mk_dyv_slice( v, 1, dyv_size( v));

  /* Compute [1|X] v. */
  if (lrt->X != NULL) Xv = mk_spardat_times_dyv( lrt->X, subv);
  else Xv = mk_dym_times_dyv( lrt->M, subv);
  dyv_scalar_add( Xv, v0, Xv);
  free_dyv( subv);

  /* Compute W [1|X] v. */
  dyv_mult( Xv, lrt_w_ref(lrt), Xv);    /* Xv now stores WXv. */

  /* Compute Xt W [1|X] v and  1t W [1|X] v separately. Both get stored in
     XtWXv. */
  if (lrt->X != NULL) XtWXv = mk_spardat_transpose_times_dyv( lrt->X, Xv);
  else XtWXv = mk_dym_transpose_times_dyv( lrt->M, Xv);
  cterm = dyv_sum( Xv);
  dyv_insert( XtWXv, 0, cterm);

  free_dyv( Xv);

  return XtWXv;
}
Esempio n. 2
0
void lr_cg_multA( const dyv *v, dyv *result, void *userdata)
{
  double lambda;
  lr_train *lrt;
  dyv *Av, *lv;

  lrt = (lr_train *) userdata;

  /* Do sparse matrix-vector multiply. */
  Av = mk_lr_XtWXv_dyv( lrt, v);

  lambda = lrt->opts->rrlambda;
  if (lambda > 0.0) {
    /* Add Ridge Regression term. */
    lv = mk_dyv_scalar_mult( v, lambda);
    dyv_set( lv, 0, 0.0);  /* Don't penalize constant term. */
    dyv_plus( Av, lv, result);
    free_dyv( lv);
  }
  else {
    /* Don't do Ridge Regression. */
    copy_dyv( Av, result);
  }

  free_dyv( Av);
  return;
}
Esempio n. 3
0
double lr_deviance_from_cg( lr_train *lrt, conjgrad *cg)
{
  int numrows;
  double cgb0, likelihood, dev;
  dyv *cgb, *cgn, *cgu;

  /* Get beta. */
  cgb = mk_copy_dyv( conjgrad_x_ref( cg)); /* good params */
  cgb0 = dyv_ref( cgb, 0);
  dyv_remove( cgb, 0);

  numrows = lrt->numrows;
  cgn = mk_dyv( numrows);
  cgu = mk_dyv( numrows);

  /* Compute u and n. */
  if (lrt->X != NULL) lr_compute_n_from_spardat( lrt->X, cgb0, cgb, cgn);
  else lr_compute_n_from_dym( lrt->M, cgb0, cgb, cgn);
  free_dyv( cgb);
  lr_compute_u_from_n( cgn, cgu);
  free_dyv( cgn);

  /* Compute likelihood and deviance. */
  likelihood = lr_log_likelihood_basic( lrt->y, cgu);
  free_dyv( cgu);

  dev = lr_deviance_from_log_likelihood( likelihood, lrt->likesat);

  return dev;
}
Esempio n. 4
0
/* 
   PRE: size of actual_dist is same as size of hypothesized_dist.
        Any entry in which hypothesized_dist has a value of
        zero must have an actual_dist value of zero (i.e.
          forall i, hy_dist[i]==0 => ac_dist[i] == 0

   Given two distributions represented as histograms 
   (actual_dist and hypothesized_dist), how much evidence is there that they are
   from the same distribution? 
   Note that these things must be counts. Each element of actual_dist must
   be an integer. Each element of hypothesized_dist may be non-integer
   because we're talking expected counts there.

   The prob returned by this function answers that question
   using a standard chi-squared test. If it is low (e.g. < 0.05), then it is
   unlikely that they are the same. 

   The "dof" parameter is the mysterious "Degrees Of Freedom" that haunts
   any use of the word "Chi". 
   
       If it is possible for any entry in the dist
       to take any value, then set dof==size.

       If the sum of values is constrained to a certain value
       then set dof==size-1.

       If there are more constraints than that, then subtract
       more from size.
*/
double chi_squared_prob(dyv *actual_dist,dyv *hypothesized_dist,int dof)
{
  double result = -1.0;
  double min_hyp_dist = dyv_min(hypothesized_dist);
  if ( min_hyp_dist < 0.0 )
    my_error("chi_squared_prob: -ve count in hypothesized_dist");
  else if ( min_hyp_dist > 0.0 )
    result = chi_squared_prob_helper(actual_dist,hypothesized_dist,dof);
  else
  {
    dyv *copy_ad = mk_dyv(0);
    dyv *copy_hd = mk_dyv(0);
    int i;
    for ( i = 0 ; i < dyv_size(actual_dist) ; i++ )
    {
      if ( dyv_ref(hypothesized_dist,i) > 0.0 )
      {
        add_to_dyv(copy_ad,dyv_ref(actual_dist,i));
        add_to_dyv(copy_hd,dyv_ref(hypothesized_dist,i));
        dof -= 1;
      }
      else if ( dyv_ref(actual_dist,i) > 0.0 )
        my_error("chi_squared_prob: actual_dist value must be zero if hyp dist value is zero");
    }
    dof = int_max(2,dof);
    result = chi_squared_prob_helper(copy_ad,copy_hd,dof);
    free_dyv(copy_ad);
    free_dyv(copy_hd);
  }

  return result;
}
Esempio n. 5
0
void free_lr_state( lr_state *lrs)
{
  if (lrs != NULL) {
    if (lrs->b != NULL) free_dyv( lrs->b);
    if (lrs->n != NULL) free_dyv( lrs->n);
    if (lrs->u != NULL) free_dyv( lrs->u);
    if (lrs->w != NULL) free_dyv( lrs->w);
    if (lrs->z != NULL) free_dyv( lrs->z);
    AM_FREE( lrs, lr_state);
  }
  return;
}
Esempio n. 6
0
int lr_train_update_b( lr_train *lrt)
{
  /* X,w,z -> b */
  /*
                   [1t]                [1t]
    Compute b = (( [--] W [1|X])^-1) * [--] W z, where W = diag(w).
                   [Xt]                [Xt]
  */
  int numatts, i, iters;
  double cgeps, cgdeveps, val;
  dyv *B, *initb;

  numatts = lrt->numatts;

  /* We are now using initial CG residuaal for scaling cgeps.
     This is best done inside mk_lr_cgresult(). */
  /* cgeps = lrt->numatts * lrt->opts->cgeps; */
  cgeps = lrt->opts->cgeps;
  cgdeveps = lrt->opts->cgdeveps;

  /* Create initb. */
  initb = NULL;
  if (lrt->opts->cgbinit) {
    initb = mk_dyv( numatts);
    dyv_set( initb, 0, lrt_b0_ref(lrt));
    for (i=1; i<numatts; ++i) {
      val = dyv_ref( lrt_b_ref(lrt), i-1);
      dyv_set( initb, i, val);
    }
  }

  B = mk_lr_update_b_conjugate_gradient_helper( lrt, cgeps, cgdeveps,
                                                lrt->opts->cgmax, &iters,
                                                initb);

  if (initb != NULL) free_dyv( initb);

  /* Break newb into ( b0, b ). */
  lrt_b0_set(lrt, dyv_ref( B, 0));
  for (i=1; i<numatts; ++i) {
    val = dyv_ref( B, i);
    dyv_set( lrt_b_ref(lrt), i-1, val);
  }

  free_dyv( B);

  /* Hitting cgmax is considered a failure. */
  if ( iters > lrt->opts->cgmax) return -2;

  return 1;
}
Esempio n. 7
0
dyv *mk_lr_XtWz_dyv( const lr_train *lrt)
{
  /* XtWz = Xt v = [ r_i ], v = w * z, elementwise,
     r_i = dyv_partial_sum( v, posrows_i) */
  double cterm;
  dyv *Wz, *XtWz;

  /*
            [1t]    
    Compute [--] W z
            [Xt]
  */

  /* Compute Wz. */
  Wz = mk_dyv_mult( lrt_w_ref(lrt), lrt_z_ref(lrt));

  /* Compute XtWz. */
  if (lrt->X != NULL) XtWz = mk_spardat_transpose_times_dyv( lrt->X, Wz);
  else XtWz = mk_dym_transpose_times_dyv( lrt->M, Wz);

  /* Insert 1t Wz at beginning of XtWz. */
  cterm = dyv_sum( Wz);
  dyv_insert( XtWz, 0, cterm);
  free_dyv( Wz);

  return XtWz;
}
Esempio n. 8
0
lr_predict *mk_in_lr_predict( PFILE *f)
{
  int i, size;
  double val;
  dyv *dv, *b;
  lr_predict *lrp;

  lrp = AM_MALLOC( lr_predict);

  dv = mk_dyv_read( f);
  size = dyv_size( dv);

  lrp->b0 = dyv_ref( dv, 0);

  b = mk_dyv( size-1);
  for (i=1; i<size; ++i) {
    val = dyv_ref( dv, i);
    dyv_set( b, i-1, val);
  }
  lrp->b = b;

  free_dyv( dv);

  return lrp;
}
Esempio n. 9
0
double dym_sum_row(dym *x,int row)
{
  dyv *dv = mk_dyv_from_dym_row(x,row);
  double result = dyv_sum(dv);
  free_dyv(dv);
  return result;
}
Esempio n. 10
0
void free_lr_predict( lr_predict *lrp)
{
  if (lrp != NULL) {
    if (lrp->b   != NULL) free_dyv( lrp->b);
    AM_FREE( lrp, lr_predict);
  }
  return;
}
Esempio n. 11
0
double gauss_height_fn(char *data,double x,double z)
{
  gauss_info *gi = (gauss_info *) data;
  dyv *xdyv = mk_dyv_2(x,z);
  double result = gauss_eval(xdyv,gi->mu,gi->cov_inv,gi->cov_determinant);
  free_dyv(xdyv);
  return result;
}
Esempio n. 12
0
double lr_deviance_from_dym_b( const dym *M, dyv *y, double b0, dyv *b)
{
  int numrows;
  double dev;
  dyv *n, *u;

  numrows = dym_rows( M);
  n = mk_dyv( numrows);
  u = mk_dyv( numrows);

  lr_compute_n_from_dym( M, b0, b, n);
  lr_compute_u_from_n( n, u);
  dev = lr_deviance_basic( y, u);

  free_dyv( n);
  free_dyv( u);
  return dev;
}
Esempio n. 13
0
double gauss_eval(dyv *x,dyv *mu,dym *cov_inv,double covdet)
{
  dyv *x_minus_mu = mk_dyv_subtract(x,mu);
  double quad_form = dym_xt_a_x_value(x_minus_mu,cov_inv);
  double multiplicand = 1 / sqrt(2 * PI * covdet);
  double result = multiplicand * exp(-quad_form / 2);
  free_dyv(x_minus_mu);
  return result;
}  
Esempio n. 14
0
double lr_deviance_from_spardat_b( const spardat *X, dyv *y, double b0,
                                   dyv *b)
{
  int numrows;
  double dev;
  dyv *n, *u;

  numrows = spardat_num_rows( X);
  n = mk_dyv( numrows);
  u = mk_dyv( numrows);

  lr_compute_n_from_spardat( X, b0, b, n);
  lr_compute_u_from_n( n, u);
  dev = lr_deviance_basic( y, u);

  free_dyv( n);
  free_dyv( u);
  return dev;
}
Esempio n. 15
0
void free_lr_train( lr_train *lrt)
{
  if (lrt != NULL) {
    if (lrt->lrs != NULL)      free_lr_state( lrt->lrs);
    if (lrt->y != NULL)        free_dyv( lrt->y);
    if (lrt->opts != NULL)     free_lr_options( lrt->opts);
    AM_FREE(lrt, lr_train);
  }
  return;
}
Esempio n. 16
0
void gauss_main(int argc,char *argv[])
{
  dyv *dmu = mk_dyv_2(1.0,2.0);
  dym *dcov = mk_dym_22(1.0,0.0,0.0,3.0);
  dyv *dlo = mk_dyv_2(-4.0,-4.0);
  dyv *dhi = mk_dyv_2(4.0,4.0);
  dyv *mu = mk_dyv_from_args("mu",argc,argv,dmu);
  dym *cov = mk_dym_from_args("cov",argc,argv,dcov);
  dyv *lo = mk_dyv_from_args("lo",argc,argv,dlo);
  dyv *hi = mk_dyv_from_args("hi",argc,argv,dhi);

  draw_2d_gaussian(mu,cov,lo,hi);

  free_dyv(dmu);
  free_dym(dcov);
  free_dyv(dlo);
  free_dyv(dhi);
  free_dyv(mu);
  free_dym(cov);
  free_dyv(lo);
  free_dyv(hi);

  wait_for_key();
  am_malloc_report();
}
Esempio n. 17
0
/* Makes a dym consisting of a subset of the rows in x. The members of
   of the subset are those rows mentioned in "rows".
   Result will this have "ivec_size(rows)" rows and dym_cols(x) columns */
dym *mk_dym_from_subset_of_rows(dym *x,ivec *rows)
{
  int num_rows = ivec_size(rows);
  int i;
  dym *result = mk_dym(num_rows,dym_cols(x));

  for ( i = 0 ; i < num_rows ; i++ )
  {
    int row = ivec_ref(rows,i);
    dyv *vec = mk_dyv_from_dym_row(x,row);
    copy_dyv_to_dym_row(vec,result,i);
    free_dyv(vec);
  }
  return result;
}
Esempio n. 18
0
dyv *mk_lr_update_b_conjugate_gradient_helper( lr_train *lrt, double cgeps,
                                               double cgdeveps,
                                               int maxiters, int *iters,
                                               dyv *initx)
{
  int numatts;
  dyv *B, *x;
  cgopts *cgo;
  conjgrad *cg;

  numatts = lrt->numatts;

  B = mk_lr_XtWz_dyv( lrt);
  cgo = mk_cgopts_qspd( numatts, maxiters, -1.0 /* eps for runcg */,
                        B, initx,
                        (void *) lrt,
                        lr_cg_mk_copy_userdata,
                        lr_cg_free_userdata,
                        lr_cg_multA);
  free_dyv( B);

  /* Set up preconditioning. */
  /* set_cgopts_multMinv( cgo, diag_precond); */

  /* Remainder of setup work. */
  cg  = mk_conjgrad_from_cgopts( cgo);
  free_cgopts( cgo);

  /* Run conjugate gradient. */
  /* Course-grained method: runcg( cg); x = conjgrad_x_ref( cg); */
  /* Fine-grained method: */
  x = mk_lr_cgresult( lrt, cgeps, cgdeveps, maxiters, cg);

  /* Print iteration information. */
  *iters = cg->cgs->iterations;
  if (Verbosity >= 3) printf( "CG iterations=%d\n", cg->cgs->iterations);

  /* Done. */
  free_conjgrad( cg);
  return x;
}
Esempio n. 19
0
void out_lr_predict( PFILE *f, lr_predict *lrp)
{
  int nump, i;
  double val;
  dyv *dv;

  nump = dyv_size( lrp->b) + 1;

  /* Copy b0, b into a single dyv. */
  dv = mk_dyv( nump);
  dyv_set( dv, 0, lrp->b0);
  for (i=1; i<nump; ++i) {
    val = dyv_ref( lrp->b, i-1);
    dyv_set( dv, i, val);
  }

  dyv_write( f, dv);

  free_dyv( dv);
  return;
}
Esempio n. 20
0
/* Exactly one of X and ds should be NULL. */
lr_train *mk_lr_train( spardat *X, dym *factors, dyv *outputs,
                       dyv *initb, lr_options *opts)
{
  /* initb is copied into lr->b. */
  int converge, rc;
  int numiters, bestiter;
  double dev, olddev;
  dyv *devhist;
  lr_train *lrt;
  lr_state *bestlrs;
  lr_statearr *lrsarr;

  /* Create lr_train struct. */
  if (X != NULL) lrt = mk_lr_train_from_spardat( X, opts);
  else lrt = mk_lr_train_from_dym( factors, outputs, opts);

  /* Set initial value of model parameters, if desired. */
  if (initb != NULL) lr_train_overwrite_b( lrt, initb);

  /* Initialize our loop state */
  dev = -1000.0;
  lrsarr = mk_array_of_null_lr_states( opts->lrmax);
  devhist = mk_constant_dyv( opts->lrmax, FLT_MAX);

  /* START OF IRLS ITERATIONS */
  /* Iterate until the change in deviance is relatively small. */
  for (numiters=0; numiters < opts->lrmax; ++numiters) {

    /* Update olddev and iterate. */
    olddev = dev;
    rc = lr_train_iterate(lrt);

    /* Test for convergence. */
    lr_statearr_set( lrsarr, numiters, lrt->lrs);
    converge = lr_deviance_test( lrt, opts->lreps, olddev, &dev);
    dyv_set( devhist, numiters, dev);

    /* Print stuff. */
    if (Verbosity >= 1) printf( ".");
    if (Verbosity >= 3) {
      printf( "LR ITER %d: likesat: %g, likelihood: %g, deviance: %g\n",
	      numiters, lrt->likesat,
              lr_log_likelihood_from_deviance( dev, lrt->likesat), dev);
    }
    if (Verbosity >= 5) {
      /* Print all or most extreme attributes. */
        printf( "  Params, b0: %g\n", lrt->lrs->b0);
        fprintf_oneline_dyv( stdout, "  Params, b:", lrt->lrs->b, "\n");
    }

    if (converge) break;
    else if (rc == -2) break; /* Exceeded cgmax. */
    else if (am_isnan(dev)) break;
  }
  /* END OF ITERATIONS */

  /* Check state history for best holdout performance. */
  bestiter = dyv_argmin( devhist);
  bestlrs  = lr_statearr_ref( lrsarr, bestiter);
  free_lr_state( lrt->lrs);
  lrt->lrs = mk_copy_lr_state( bestlrs);
	if (converge) lrt->lrs->converged = converge;
  if (Verbosity == 1) printf( "\n");
  if (Verbosity >= 2) {
    printf( "CHOOSING ITERATION %d WITH DEVIANCE %g\n",
            bestiter, dyv_ref( devhist, bestiter));
  }
  if (Verbosity >= 2) {
    fprintf_oneline_dyv( stdout, "  devhist:", devhist, "\n");
  }

  /* Free state history. */
  free_lr_statearr( lrsarr);
  free_dyv( devhist);

  /* Done. */
  return lrt;
}
Esempio n. 21
0
void free_integ(integ *it)
{
  free_dyv(it->integral);
  AM_FREE(it,integ);
}
Esempio n. 22
0
dyv *mk_lr_cgresult_cgeps( lr_train *lrt, double unscaled_cgeps,
                           int maxiters, conjgrad *cg)
{
  int iters, bestiter, window;
  double rsqr, bestrsqr, decay, cgeps, decthresh;
  dyv *x, *result, *rsqrhist;
  dyv_array *paramhist;

  /* Initialize paramters. */
  rsqrhist    = mk_constant_dyv( maxiters, FLT_MAX);
  paramhist  = mk_array_of_null_dyvs( maxiters);
  bestrsqr   = FLT_MAX;
  window     = lrt->opts->cgwindow;
  decay      = lrt->opts->cgdecay;
  decthresh  = FLT_MAX;

  /* Scale cgeps. */
  rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r));
  if (Verbosity >= 2) printf( "    CGINITIAL RSQR: %g\n", rsqr);
  cgeps = unscaled_cgeps * rsqr;

  /* Store initial position in history. */
  iters = 0;
  dyv_set( rsqrhist, iters, rsqr);
  dyv_array_set( paramhist, iters, conjgrad_x_ref( cg));
  iters += 1;

  /* Abort iterations if rsqr gets too small for calcs to proceed. */
  while (rsqr >= cgeps) {
    if (Verbosity > 3) {
      fprintf_oneline_dyv( stdout, "    CG POS:", cg->cgs->x, "\n");
    }

    /* Non-epsilon termination conditions. */
    if (iters >= maxiters) break;
    if (window <= 0) break;
    if (rsqr > decthresh) break;

    /* Iterate. */
    cgiter( cg);

    /* CG resisdual Euclidean norm. */
    rsqr = dyv_magnitude( conjgrad_r_ref( cg));

    /* Store history. */
    dyv_set( rsqrhist, iters, rsqr);
    dyv_array_set( paramhist, iters, conjgrad_x_ref( cg));
    if (Verbosity >= 2) printf( "    CGEPS RSQR: %g\n", rsqr);


    /* Update records. */
    if (rsqr <= bestrsqr) {
      bestrsqr  = rsqr;
      window    = lrt->opts->cgwindow;
      decthresh = decay * bestrsqr;
    }
    else window -= 1;

    /* Count number of iters. */
    iters += 1;
  }

  /* Select parameters. */
  /* CG residual: use last iteration's parameter vector. */
  /* x = conjgrad_x_ref( cg); */
  /* Get best params from paramhist. */
  bestiter = dyv_argmin( rsqrhist);
  x = dyv_array_ref( paramhist, bestiter);
  if (x == NULL) {
    my_errorf( "mk_lr_cgresult_cgeps: NULL param vec %d", bestiter);
  }

  if (Verbosity >= 2) {
    rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r));
    printf( "    CGFINAL RSQR: %g\n", rsqr);
  }

  result = mk_copy_dyv( x);
  free_dyv_array( paramhist);
  free_dyv( rsqrhist);
  return result;

}
Esempio n. 23
0
dyv *mk_lr_cgresult_cgdeveps( lr_train *lrt, double cgdeveps,
                              int maxiters, conjgrad *cg)
{
  int iters, bestiter, window;
  double dev, olddev, bestdev, rsqr, decay, decthresh;
  dyv *devhist, *x, *result;
  dyv_array *paramhist;

  /* Run conjugate gradient. */
  devhist    = mk_constant_dyv( maxiters, FLT_MAX);
  paramhist  = mk_array_of_null_dyvs( maxiters);
  dev        = -FLT_MAX;
  bestdev    = FLT_MAX;
  window     = lrt->opts->cgwindow;
  decay      = lrt->opts->cgdecay;
  decthresh  = FLT_MAX;

  /* Scale cgeps. */
  rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r));

  /* Store initial position in history. */
  iters = 0;
  dev = lr_deviance_from_cg( lrt, cg);
  dyv_set( devhist, iters, dev);
  dyv_array_set( paramhist, iters, conjgrad_x_ref( cg));
  iters += 1;

  /* Abort the iters if rsqr gets too small for calcs to proceed. */
  while (rsqr > 1e-300) {
    if (Verbosity > 3) {
      fprintf_oneline_dyv( stdout, "    CG POS:", cg->cgs->x, "\n");
    }

    /* Non-deviance termination criteria. */
    if (iters > maxiters) break; /* Strict, since we start with iters=1. */
    if (window <= 0) break;
    if (dev > decthresh) break;

    /* Iterate. */
    olddev  = dev;
    cgiter( cg);

    /* Relative difference of deviance. */
    dev = lr_deviance_from_cg( lrt, cg);
    if (dev <= bestdev) {
      bestdev   = dev;
      window    = lrt->opts->cgwindow;
      decthresh = decay * bestdev;
    }
    else window -= 1;

    /* Store history. */
    dyv_set( devhist, iters, dev);
    dyv_array_set( paramhist, iters,
                   conjgrad_x_ref( cg) /* good params */);
    if (Verbosity >= 2) printf( "CG DEVIANCE: %g\n", dev);

    /* Terminate on rel diff of deviance. */
    if (fabs(olddev-dev) < dev*cgdeveps) break;

    /* Count number of iters. */
    iters += 1;

    /* We must calculate rsqr for the while-loop condition. */
    rsqr = dyv_magnitude( conjgrad_r_ref( cg));
  }

  /* Select parameters. */
  /* Get best params from paramhist. */
  bestiter = dyv_argmin( devhist);
  x = dyv_array_ref( paramhist, bestiter);
  if (x == NULL) {
    my_errorf( "mk_lr_cgresult_cgdeveps: NULL param vec %d", bestiter);
  }

  result = mk_copy_dyv( x);
  free_dyv_array( paramhist);
  free_dyv( devhist);
  return result;
}