예제 #1
0
/* 
   PRE: size of actual_dist is same as size of hypothesized_dist.
        Any entry in which hypothesized_dist has a value of
        zero must have an actual_dist value of zero (i.e.
          forall i, hy_dist[i]==0 => ac_dist[i] == 0

   Given two distributions represented as histograms 
   (actual_dist and hypothesized_dist), how much evidence is there that they are
   from the same distribution? 
   Note that these things must be counts. Each element of actual_dist must
   be an integer. Each element of hypothesized_dist may be non-integer
   because we're talking expected counts there.

   The prob returned by this function answers that question
   using a standard chi-squared test. If it is low (e.g. < 0.05), then it is
   unlikely that they are the same. 

   The "dof" parameter is the mysterious "Degrees Of Freedom" that haunts
   any use of the word "Chi". 
   
       If it is possible for any entry in the dist
       to take any value, then set dof==size.

       If the sum of values is constrained to a certain value
       then set dof==size-1.

       If there are more constraints than that, then subtract
       more from size.
*/
double chi_squared_prob(dyv *actual_dist,dyv *hypothesized_dist,int dof)
{
  double result = -1.0;
  double min_hyp_dist = dyv_min(hypothesized_dist);
  if ( min_hyp_dist < 0.0 )
    my_error("chi_squared_prob: -ve count in hypothesized_dist");
  else if ( min_hyp_dist > 0.0 )
    result = chi_squared_prob_helper(actual_dist,hypothesized_dist,dof);
  else
  {
    dyv *copy_ad = mk_dyv(0);
    dyv *copy_hd = mk_dyv(0);
    int i;
    for ( i = 0 ; i < dyv_size(actual_dist) ; i++ )
    {
      if ( dyv_ref(hypothesized_dist,i) > 0.0 )
      {
        add_to_dyv(copy_ad,dyv_ref(actual_dist,i));
        add_to_dyv(copy_hd,dyv_ref(hypothesized_dist,i));
        dof -= 1;
      }
      else if ( dyv_ref(actual_dist,i) > 0.0 )
        my_error("chi_squared_prob: actual_dist value must be zero if hyp dist value is zero");
    }
    dof = int_max(2,dof);
    result = chi_squared_prob_helper(copy_ad,copy_hd,dof);
    free_dyv(copy_ad);
    free_dyv(copy_hd);
  }

  return result;
}
예제 #2
0
파일: lr.c 프로젝트: dineshmdh/randomjungle
double lr_deviance_from_cg( lr_train *lrt, conjgrad *cg)
{
  int numrows;
  double cgb0, likelihood, dev;
  dyv *cgb, *cgn, *cgu;

  /* Get beta. */
  cgb = mk_copy_dyv( conjgrad_x_ref( cg)); /* good params */
  cgb0 = dyv_ref( cgb, 0);
  dyv_remove( cgb, 0);

  numrows = lrt->numrows;
  cgn = mk_dyv( numrows);
  cgu = mk_dyv( numrows);

  /* Compute u and n. */
  if (lrt->X != NULL) lr_compute_n_from_spardat( lrt->X, cgb0, cgb, cgn);
  else lr_compute_n_from_dym( lrt->M, cgb0, cgb, cgn);
  free_dyv( cgb);
  lr_compute_u_from_n( cgn, cgu);
  free_dyv( cgn);

  /* Compute likelihood and deviance. */
  likelihood = lr_log_likelihood_basic( lrt->y, cgu);
  free_dyv( cgu);

  dev = lr_deviance_from_log_likelihood( likelihood, lrt->likesat);

  return dev;
}
예제 #3
0
파일: lr.c 프로젝트: dineshmdh/randomjungle
lr_predict *mk_in_lr_predict( PFILE *f)
{
  int i, size;
  double val;
  dyv *dv, *b;
  lr_predict *lrp;

  lrp = AM_MALLOC( lr_predict);

  dv = mk_dyv_read( f);
  size = dyv_size( dv);

  lrp->b0 = dyv_ref( dv, 0);

  b = mk_dyv( size-1);
  for (i=1; i<size; ++i) {
    val = dyv_ref( dv, i);
    dyv_set( b, i-1, val);
  }
  lrp->b = b;

  free_dyv( dv);

  return lrp;
}
예제 #4
0
파일: lr.c 프로젝트: dineshmdh/randomjungle
double lr_deviance_from_dym_b( const dym *M, dyv *y, double b0, dyv *b)
{
  int numrows;
  double dev;
  dyv *n, *u;

  numrows = dym_rows( M);
  n = mk_dyv( numrows);
  u = mk_dyv( numrows);

  lr_compute_n_from_dym( M, b0, b, n);
  lr_compute_u_from_n( n, u);
  dev = lr_deviance_basic( y, u);

  free_dyv( n);
  free_dyv( u);
  return dev;
}
예제 #5
0
파일: lr.c 프로젝트: dineshmdh/randomjungle
double lr_deviance_from_spardat_b( const spardat *X, dyv *y, double b0,
                                   dyv *b)
{
  int numrows;
  double dev;
  dyv *n, *u;

  numrows = spardat_num_rows( X);
  n = mk_dyv( numrows);
  u = mk_dyv( numrows);

  lr_compute_n_from_spardat( X, b0, b, n);
  lr_compute_u_from_n( n, u);
  dev = lr_deviance_basic( y, u);

  free_dyv( n);
  free_dyv( u);
  return dev;
}
예제 #6
0
파일: lr.c 프로젝트: dineshmdh/randomjungle
int lr_train_update_b( lr_train *lrt)
{
  /* X,w,z -> b */
  /*
                   [1t]                [1t]
    Compute b = (( [--] W [1|X])^-1) * [--] W z, where W = diag(w).
                   [Xt]                [Xt]
  */
  int numatts, i, iters;
  double cgeps, cgdeveps, val;
  dyv *B, *initb;

  numatts = lrt->numatts;

  /* We are now using initial CG residuaal for scaling cgeps.
     This is best done inside mk_lr_cgresult(). */
  /* cgeps = lrt->numatts * lrt->opts->cgeps; */
  cgeps = lrt->opts->cgeps;
  cgdeveps = lrt->opts->cgdeveps;

  /* Create initb. */
  initb = NULL;
  if (lrt->opts->cgbinit) {
    initb = mk_dyv( numatts);
    dyv_set( initb, 0, lrt_b0_ref(lrt));
    for (i=1; i<numatts; ++i) {
      val = dyv_ref( lrt_b_ref(lrt), i-1);
      dyv_set( initb, i, val);
    }
  }

  B = mk_lr_update_b_conjugate_gradient_helper( lrt, cgeps, cgdeveps,
                                                lrt->opts->cgmax, &iters,
                                                initb);

  if (initb != NULL) free_dyv( initb);

  /* Break newb into ( b0, b ). */
  lrt_b0_set(lrt, dyv_ref( B, 0));
  for (i=1; i<numatts; ++i) {
    val = dyv_ref( B, i);
    dyv_set( lrt_b_ref(lrt), i-1, val);
  }

  free_dyv( B);

  /* Hitting cgmax is considered a failure. */
  if ( iters > lrt->opts->cgmax) return -2;

  return 1;
}
예제 #7
0
integ *mk_integ(
    double (*h)(double parameter,double constant,double x),
    double xlo,
    double xhi,
    double parameter,
    double constant,
    int size
  )
/*
   Returns an it in which
   it->integral[i] = integal_from_xlo_to(x_lo + h*i) of h(parameter,x) dx
                     ------------------------------------------------
                     integal_from_xlo_to_x_hi of h(parameter,x) dx
*/
{
  integ *it = AM_MALLOC(integ);
  dyv *dig = mk_dyv(size);
  int i;
  double sum = 0.0;
  double last_pdf = 0.0;
  double delta = (xhi - xlo) / (size-1);

  if ( h(parameter,constant,xhi) > 1e-6 )
    my_error("Hmm... I was really hoping h(parameter,xhi) == 0");

  dyv_set(dig,0,0.0);

  for ( i = 1 ; i < size ; i++ )
  {
    double x = xlo + i * delta;
    double this_pdf = h(parameter,constant,x);
    if (i == 1) sum += delta * this_pdf;
    else        sum += delta * (this_pdf + last_pdf) / 2.0;
    dyv_set(dig,i,sum);
    last_pdf = this_pdf;  /* added 2/26/97  JGS */
  }

  dyv_scalar_mult(dig,1.0 / sum,dig);

  it -> integral = dig;
  it -> xlo = xlo;
  it -> xhi = xhi;
  it -> parameter = parameter;
  it -> constant = constant;

  return(it);
}
예제 #8
0
dyv *mk_dyv_read( PFILE *f)
{
  int i, size, lineno;
  double val;
  char line[101];
  dyv *dv;

  lineno = 1;
  line[100] = '\0';

  /* Read size and make dyv. */
    if (pfeof(f)) {
      my_errorf( "mk_dyv_read: unexpected end-of-file while reading size,\n"
                 "after line %d of file", lineno);
    }
  if (pfgets( line, 100, f) == NULL) {
    my_errorf( "mk_dyv_read: failed to read line %d from the passed stream.",
               lineno);
  }
  else lineno++;
  size = atoi( line);
  dv = mk_dyv( size);


  /* Read values. */
  for (i=0; i<size; ++i) {
    if (pfeof(f)) {
      my_errorf( "mk_dyv_read: unexpected end-of-file while reading %d vals,\n"
                 "after line %d of file (after the %dth value)",
                 size, lineno, lineno-1);
    }
    if (pfgets( line, 100, f) == NULL) {
      my_errorf( "mk_dyv_read: failed to read line %d from the passed stream.",
                 lineno);
    }
    else lineno++;

    val = atof( line);
    dyv_set( dv, i, val);
  }

  return dv;
}
예제 #9
0
dyv *mk_dyv_x( int size, ...)
{
  /* Warning: no type checking can be done by the compiler.  You *must*
     send the values as doubles for this to work correctly. */
  int i;
  double val;
  va_list argptr;
  dyv *dv;
  
  dv = mk_dyv( size);

  va_start( argptr, size);
  for (i=0; i<size; ++i) {
    val = va_arg( argptr, double);
    dyv_set( dv, i, val);
  }
  va_end(argptr);

  return dv;
}
예제 #10
0
파일: lr.c 프로젝트: dineshmdh/randomjungle
void out_lr_predict( PFILE *f, lr_predict *lrp)
{
  int nump, i;
  double val;
  dyv *dv;

  nump = dyv_size( lrp->b) + 1;

  /* Copy b0, b into a single dyv. */
  dv = mk_dyv( nump);
  dyv_set( dv, 0, lrp->b0);
  for (i=1; i<nump; ++i) {
    val = dyv_ref( lrp->b, i-1);
    dyv_set( dv, i, val);
  }

  dyv_write( f, dv);

  free_dyv( dv);
  return;
}