Ejemplo n.º 1
0
double gamma ( int v )
{
   double x, y, z, vm1, root ;

   switch (v) {

      case 1:                   // Chi-square with 1 df is 2 gamma(.5)
         x = normal () ;
         return 0.5 * x * x ;

      case 2:                   // Gamma(1) is exponential(1)
         for (;;) {
            x = unifrand () ;
            if (x > 0.0)
               return -log ( x ) ;
            }

      default:                  // Valid for all real a>1 (a=v/2)
         vm1 = 0.5 * v - 1.0 ;
         root = sqrt ( v - 1.0 ) ;

         for (;;) {
            y = tan ( PI * unifrand () ) ;
            x = root * y + vm1 ;
            if (x <= 0.0)
               continue ;
            z = (1.0 + y * y) * exp ( vm1 * log(x/vm1) - root * y ) ;
            if (unifrand () <= z)
               return x ;
            }
      }
}
Ejemplo n.º 2
0
double normal ()
{
   double x1, x2 ;

   for (;;) {
      x1 = unifrand () ;
      if (x1 <= 0.0)      // Safety: log(0) is undefined
         continue ;
      x1 = sqrt ( -2.0 * log ( x1 )) ;
      x2 = cos ( 2.0 * PI * unifrand () ) ;
      return x1 * x2 ;
      }
}
Ejemplo n.º 3
0
void normal_pair ( double *x1 , double *x2 )
{
   double u1, u2 ;

   for (;;) {
      u1 = unifrand () ;
      if (u1 <= 0.0)      // Safety: log(0) is undefined
         continue ;
      u1 = sqrt ( -2.0 * log ( u1 )) ;
      u2 = 2.0 * PI * unifrand () ;
      *x1 = u1 * sin ( u2 ) ;
      *x2 = u1 * cos ( u2 ) ;
      return ;
      }
}
Ejemplo n.º 4
0
uint32_t generate_addressv4(struct gentrie *trie)
{
    /* assume that if there's a non-NULL trie passed that
       it has been properly initialized with initialize_trie */
    if (!trie)
        return 0;

    struct node *currnode = trie->root;
    int remaining_bits = 32 - trie->prefixlen;
    uint32_t xaddr = 0;
    while (remaining_bits > 0)
    {
        xaddr <<= 1;
        double pprime = unifrand();
        int leftright = pprime >= currnode->p; // 0 if left, 1 if right
        xaddr |= leftright;

        /* don't bother to create a node for the last bit */
        /* if we're not there yet, create a new node based on
           p vs. pprime */
        if (remaining_bits > 1 && !currnode->children[leftright])
        {
            struct node *xnode = new_node();
            xnode->p = genbeta(trie->beta);
            currnode->children[leftright] = xnode;
            currnode = xnode;
        }
        remaining_bits--;
    }
    return (trie->netaddr | xaddr);
}
Ejemplo n.º 5
0
static void pick_parents (
   int *nchoices ,  // Number of choices (returned decremented by two)
   int *choices ,   // Array (nchoices long) of candidates for parent
   int *parent1 ,   // One parent returned here
   int *parent2     // and the other here
   )
{
   int k ;

   k = unifrand() * *nchoices ;        // Select position in choices array
   *parent1 = choices[k] ;             // Then return that parent
   choices[k] = choices[--*nchoices] ; // without replacement

   k = unifrand() * *nchoices ;
   *parent2 = choices[k] ;
   choices[k] = choices[--*nchoices] ;
}
Ejemplo n.º 6
0
static void fitness_to_choices (
   int popsize ,      // Length of fitness, choices vectors
   float *fitness ,  // Input array of expected selection frequencies
   int *choices       // Output array of parents
   )
{
   int individual, expected, k ;
   float rn ;

/*
   We build the choices array in two steps.  This, the first step, assigns
   parents according to the integer part of their expected frequencies.
*/

   k = 0 ;  // Will index choices array
   for (individual=0 ; individual<popsize ; individual++) {
      expected = (int) fitness[individual] ; // Assign this many now
      fitness[individual] -= expected ;      // Save fractional remainder
      while (expected--)                     // Forcibly use the int expected
         choices[k++] = individual ;         // quantity of this individual
      }

/*
   The second step is to take care of the remaining fractional expected
   frequencies.  Pass through the population, randomly selecting members
   with probability equal to their remaining fractional expectation.
   It is tempting to think that the algorithm below could loop excessively
   due to a very small fitness.  But recall that the sum of the fitnesses will
   be AT LEAST as large as the number remaining to be selected, and generally
   much more.  Thus, the ones with very small fitness (likely to cause trouble)
   will never become the only remaining possibilities.
*/

   while (k < popsize) {  // Select until choices is full
      individual = unifrand() * popsize ;// Randomly select individual
      if (fitness[individual] > 0.0) {   // Try members still having expectation
         if (fitness[individual] >= unifrand()) { // Selects with this prob
            choices[k++] = individual ;   // Bingo!  Select this individual
            fitness[individual] -= 1.0 ;  // and make it ineligable for future
            }
         }
      }
}
Ejemplo n.º 7
0
int KSSingle::rvalrand(int n) {
	int i;
	--n;
	double x = unifrand(rval_[n]);
	for (i=0; i < n; ++i) {
		if (rval_[i] >= x) {
			return i;
		}
	}
	return n; 
}
Ejemplo n.º 8
0
/*
--------------------------------------------------------------------------------

   mutate - apply the mutation operator to a single child

--------------------------------------------------------------------------------
*/
static void mutate (
   char *child ,   // Input/Output of the child
   int chromsize , // Number of variables in objective function
   float pmutate  // Probability of mutation
   )
{
   while (chromsize--) {
      if (unifrand() < pmutate)                          // Mutate this gene?
         child[chromsize] ^= (char) 1 << (longrand() % 8) ;  // Flip random bit
      }

}
Ejemplo n.º 9
0
void E0 (
   int n ,            // Number of data points
   DataClass *data ,  // The data is here
   double *app ,      // Apparent error from testing tset
   double *excess     // Excess error (add to app to get pop)
   )
{
   int i, rep, sub, ntot, *count ;
   int m = 200 ;
   double errsum ;
   DataClass *x ;

   if (m < n)
      m = n ;

   x = (DataClass *) malloc ( n * sizeof(DataClass) ) ;  // Bootstraps here
   count = (int *) malloc ( n * sizeof(int) ) ;       // Count uses in bootstrap

   errsum = 0.0 ;
   ntot = 0 ;

   for (rep=0 ; rep<m ; rep++) {

      memset ( count , 0 , n * sizeof(int) ) ; // Zero usage counter
      for (i=0 ; i<n ; i++) {                  // Bootstrap sample same size
         sub = unifrand() * n ;                // Select this case
         if (sub >= n)                         // Cheap insurance in case
            sub = n-1 ;                        // unifrand() returns 1
         x[i] = data[sub] ;                    // Get this case
         ++count[sub] ;                        // Count its use
         }

      train ( n , x ) ;                        // Train on bootstrap sample

      for (i=0 ; i<n ; i++) {                  // Check all cases
         if (! count[i]) {                     // If not used in training
            errsum += test ( 1 , data+i ) ;    // Find its error
            ++ntot ;                           // Grand test count
            }
         }
      }

   errsum /= ntot ;                            // Mean of all tests

   train ( n , data ) ;                        // Also need the
   *app = test ( n , data ) ;                  // Apparent error
   *excess = errsum - *app ;

   free ( x ) ;
   free ( count ) ;
}
Ejemplo n.º 10
0
void boot_bias_var (
   int n ,              // Number of cases in sample
   double *data ,       // The sample
   double (*user_t) (int , double * , double * ) , // Compute param
   int nboot ,          // Number of bootstrap replications
   double *rawstat ,    // Raw statistic of sample, theta-hat
   double *bias ,       // Output of bias estimate
   double *var ,        // Output of variance estimate
   double *work ,       // Work area n long
   double *work2 ,      // Work area nboot long
   double *freq         // Work area n long
   )
{
   int i, rep, k ;
   double stat, mean, variance, diff ;

   mean = 0.0 ;

   for (i=0 ; i<n ; i++)
      freq[i] = 0.0 ;

   for (rep=0 ; rep<nboot ; rep++) {    // Do all bootstrap reps (b from 1 to B)

      for (i=0 ; i<n ; i++) {           // Generate the bootstrap sample
         k = (int) (unifrand() * n) ;   // Select a case from the sample
         if (k >= n)                    // Should never happen, but be prepared
            k = n - 1 ;
         work[i] = data[k] ;            // Put bootstrap sample in work
         ++freq[k] ;                    // Tally for mean frequency
         }

      stat = user_t ( n , work , NULL ) ; // Evaluate estimator for this rep
      work2[rep] = stat ;               // Enables more accurate variance
      mean += stat ;                    // Cumulate theta-hat star dot
      }

   mean /= nboot ;
   variance = 0.0 ;
   for (rep=0 ; rep<nboot ; rep++) {    // Cumulate variance
      diff = work2[rep] - mean ;
      variance += diff * diff ;
      }

   for (i=0 ; i<n ; i++)                // Convert tally of useage
      freq[i] /= nboot * n ;            // To mean frequency of use

   memcpy ( work , data , n * sizeof(double) ) ; // user_t may reorder, so preserve
   *rawstat = user_t ( n , data , NULL) ;        // Final but biased estimate
   *bias = mean - user_t ( n , work , freq ) ;
   *var = variance / (nboot - 1) ;
}
Ejemplo n.º 11
0
void boot (
   int n ,            // Number of data points
   DataClass *data ,  // The data is here
   double *app ,      // Apparent error from testing tset
   double *excess     // Excess error (add to app to get pop)
   )
{
   int i, rep, sub, *count ;
   int m = 200 ;
   double err, errsum ;
   DataClass *x ;

   if (m < n)      // If the dataset is large
      m = n ;      // Do enough reps to be thorough

   x = (DataClass *) malloc ( n * sizeof(DataClass) ) ; // Bootstraps here
   count = (int *) malloc ( n * sizeof(int) ) ;      // Count uses in bootstrap

   errsum = 0.0 ;

   for (rep=0 ; rep<m ; rep++) {

      memset ( count , 0 , n * sizeof(int) ) ; // Zero usage counter
      for (i=0 ; i<n ; i++) {                  // Bootstrap sample same size
         sub = unifrand() * n ;                // Select this case
         if (sub >= n)                         // Cheap insurance in case
            sub = n-1 ;                        // unifrand() returns 1
         x[i] = data[sub] ;                    // Get this case
         ++count[sub] ;                        // Count its use
         }

      train ( n , x ) ;                        // Train on bootstrap sample
      for (i=0 ; i<n ; i++) {                  // Test all cases
         err = test ( 1 , data+i ) ;           // Error of this case
         errsum += err * (1 - count[i]) ;      // Bootstrap formula
         }
      }

   errsum /= (double) m  *  (double) n ;       // Grand mean

   train ( n , data ) ;                        // Also return the
   *app = test ( n , data ) ;                  // Apparent error
   *excess = errsum ;

   free ( x ) ;
   free ( count ) ;
}
Ejemplo n.º 12
0
void cauchy ( int n , double scale , double *x )
{
   double temp ;

   if (n == 1) {
      temp = PI * unifrand () - 0.5 * PI ;
      x[0] = scale * tan ( 0.99999999 * temp ) ;
      return ;
      }

   rand_sphere ( n , x ) ;

   temp = beta ( n , 1 ) ;

   if (temp < 1.0)
      temp = scale * sqrt ( temp / (1.0 - temp) ) ;
   else
      temp = 1.e10 ;

   while (n--)
      x[n] *= temp ;
}
Ejemplo n.º 13
0
static void reproduce (
   char *p1 ,        // Pointer to one parent
   char *p2 ,        // and the other
   int first_child , // Is this the first of their 2 children?
   int chromsize ,   // Number of genes in chromosome
   char *child ,     // Output of a child
   int *crosspt ,    // If first_child, output of xover pt, else input it.
   int *split        // In/out of within byte splitting point
   )

{
   int i, n1, n2, n3, n4 ;
   char left, right, *pa, *pb ;

   if (first_child) {

      *split = longrand() % 8 ; // We will split boundary bytes here
      *crosspt = 1 + unifrand() * chromsize ;  // Randomly select cross pt

      if ((chromsize >= 16)  &&  (unifrand() < 0.33333)) // Two point?
         *crosspt = -*crosspt ; // flag this for second child

      pa = p1 ;
      pb = p2 ;
      } // If first child

   else {                       // Second child
      pa = p2 ;                 // so parents reverse roles
      pb = p1 ;
      } // If second child

/*
   Prepare for reproduction
*/

   if (*split) {              // Create left and right splitting masks
      right = 1 ;
      i = *split ;
      while (--i)
         right = (right << 1) | 1 ;
      left = 255 ^ right ;
      }

   if (*crosspt > 0) {        // Use one point crossover
      n1 = chromsize / 2 ;    // This many genes in first half of child
      n2 = chromsize - n1 ;   // and this many in second half
      n3 = n4 = 0 ;           // We are using one point crossover
      i = *crosspt - 1 ;      // We will start building child here
      }
   else {                             // Use two point crossover
      n1 = n2 = n3 = chromsize / 4 ;  // This many in first three quarters
      n4 = chromsize - n1 - n2 - n3 ; // And the last quarter gets the rest
      i = -*crosspt - 1 ;             // 2 point method was flagged by neg
      }

/*
   Do reproduction here
*/

   if (*split) {
      i = (i+1) % chromsize ;
      child[i] = (left & pa[i])  |  (right & pb[i]) ;
      --n1 ;
      }

   while (n1--) {
      i = (i+1) % chromsize ;
      child[i] = pb[i] ;
      }

   if (*split) {
      i = (i+1) % chromsize ;
      child[i] = (left & pb[i])  |  (right & pa[i]) ;
      --n2 ;
      }

   while (n2--) {
      i = (i+1) % chromsize ;
      child[i] = pa[i] ;
      }

   if (n4) {               // Two point crossover?

      if (*split) {
         i = (i+1) % chromsize ;
         child[i] = (left & pa[i])  |  (right & pb[i]) ;
         --n3 ;
         }

      while (n3--) {
         i = (i+1) % chromsize ;
         child[i] = pb[i] ;
         }

      if (*split) {
         i = (i+1) % chromsize ;
         child[i] = (left & pb[i])  |  (right & pa[i]) ;
         --n4 ;
         }

      while (n4--) {
         i = (i+1) % chromsize ;
         child[i] = pa[i] ;
         }

      } // If two point crossover
}
Ejemplo n.º 14
0
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )

{
   int i, j, k, nvars, ncases, irep, nreps, nbins, nbins_dep, nbins_indep, *count ;
   int n_indep_vars, idep, icand, *index, *mcpt_max_counts, *mcpt_same_counts, *mcpt_solo_counts ;
   short int *bins_dep, *bins_indep ;
   double *data, *work, dtemp, *save_info, criterion, *crits ;
   double *ab, *bc, *b ;
   char filename[256], **names, depname[256] ;
   FILE *fp ;

/*
   Process command line parameters
*/

#if 1
   if (argc != 6) {
      printf ( "\nUsage: TRANSFER  datafile  n_indep  depname  nreps" ) ;
      printf ( "\n  datafile - name of the text file containing the data" ) ;
      printf ( "\n             The first line is variable names" ) ;
      printf ( "\n             Subsequent lines are the data." ) ;
      printf ( "\n             Delimiters can be space, comma, or tab" ) ;
      printf ( "\n  n_indep - Number of independent vars, starting with the first" ) ;
      printf ( "\n  depname - Name of the 'dependent' variable" ) ;
      printf ( "\n            It must be AFTER the first n_indep variables" ) ;
      printf ( "\n  nbins - Number of bins for all variables" ) ;
      printf ( "\n  nreps - Number of Monte-Carlo permutations, including unpermuted" ) ;
      exit ( 1 ) ;
      }

   strcpy ( filename , argv[1] ) ;
   n_indep_vars = atoi ( argv[2] ) ;
   strcpy ( depname , argv[3] ) ;
   nbins = atoi ( argv[4] ) ;
   nreps = atoi ( argv[5] ) ;
#else
   strcpy ( filename , "..\\SYNTH.TXT" ) ;
   n_indep_vars = 7 ;
   strcpy ( depname , "SUM1234" ) ;
   nbins = 2 ;
   nreps = 1 ;
#endif

   _strupr ( depname ) ;

/*
   These are used by MEM.CPP for runtime memory validation
*/

   _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ;
   fp = fopen ( mem_file_name , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MEM.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }
   fclose ( fp ) ;
   mem_keep_log = 1 ;  // Change this to 1 to keep a memory use log (slows execution!)
   mem_max_used = 0 ;

/*
   Open the text file to which results will be written
*/

   fp = fopen ( "TRANSFER.LOG" , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open TRANSFER.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }

/*
   Read the file and locate the index of the dependent variable
*/

   if (readfile ( filename , &nvars , &names , &ncases , &data ))
      return EXIT_FAILURE ;

   for (idep=0 ; idep<nvars ; idep++) {
      if (! strcmp ( depname , names[idep] ))
         break ;
      }

   if (idep == nvars) {
      printf ( "\nERROR... Dependent variable %s is not in file", depname ) ;
      return EXIT_FAILURE ;
      }

   if (idep < n_indep_vars) {
      printf ( "\nERROR... Dependent variable %s must be beyond independent vars",
               depname ) ;
      return EXIT_FAILURE ;
      }

/*
   Allocate scratch memory

   crits - Transfer Entropy criterion
   index - Indices that sort the criterion
   save_info - Ditto, this is univariate criteria, to be sorted
*/

   MEMTEXT ( "TRANSFER work allocs" ) ;
   work = (double *) MALLOC ( ncases * sizeof(double) ) ;
   assert ( work != NULL ) ;
   crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( crits != NULL ) ;
   index = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( index != NULL ) ;
   bins_indep = (short int *) MALLOC ( ncases * sizeof(short int) ) ;
   assert ( bins_indep != NULL ) ;
   bins_dep = (short int *) MALLOC ( ncases * sizeof(short int) ) ;
   assert ( bins_dep != NULL ) ;
   mcpt_max_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_max_counts != NULL ) ;
   mcpt_same_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_same_counts != NULL ) ;
   mcpt_solo_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_solo_counts != NULL ) ;
   save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( save_info != NULL ) ;
   count = (int *) MALLOC ( nbins * nbins * nbins * sizeof(int) ) ;
   assert ( count != NULL ) ;
   ab = (double *) MALLOC ( nbins * nbins * sizeof(double) ) ;
   assert ( ab != NULL ) ;
   bc = (double *) MALLOC ( nbins * nbins * sizeof(double) ) ;
   assert ( bc != NULL ) ;
   b = (double *) MALLOC ( nbins * sizeof(double) ) ;
   assert ( b != NULL ) ;

/*
   Get the dependent variable and partition it
*/

   for (i=0 ; i<ncases ; i++)            // Get the 'dependent' variable
      work[i] = data[i*nvars+idep] ;

   nbins_dep = nbins ;
   partition ( ncases , work , &nbins_dep , NULL , bins_dep ) ;

/*
   Replication loop is here
*/

   for (irep=0 ; irep<nreps ; irep++) {

/*
   Compute and save the transfer entropy of the dependent variable
   with each individual independent variable candidate.
*/

      for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
         for (i=0 ; i<ncases ; i++)
            work[i] = data[i*nvars+icand] ;

         //    Shuffle independent variable if in permutation run (irep>0)

         if (irep) {                   // If doing permuted runs, shuffle
            i = ncases ;               // Number remaining to be shuffled
            while (i > 1) {            // While at least 2 left to shuffle
               j = (int) (unifrand () * i) ;
               if (j >= i)
                  j = i - 1 ;
               dtemp = work[--i] ;
               work[i] = work[j] ;
               work[j] = dtemp ;
               }
            }

         nbins_indep = nbins ;
         partition ( ncases , work , &nbins_indep , NULL , bins_indep ) ;

         criterion = trans_ent ( ncases , nbins_indep , nbins_dep ,
                                 bins_indep , bins_dep ,
                                 0 , 1 , 1 , count , ab , bc , b ) ;

         save_info[icand] = criterion ; // We will sort this when all candidates are done
                                        
         if (irep == 0) {               // If doing original (unpermuted), save criterion
            index[icand] = icand ;      // Will need original indices when criteria are sorted
            crits[icand] = criterion ;
            mcpt_max_counts[icand] = mcpt_same_counts[icand] = mcpt_solo_counts[icand] = 1 ;  // This is >= itself so count it now
            }
         else {
            if (criterion >= crits[icand])
               ++mcpt_solo_counts[icand] ;
            }
         } // Initial list of all candidates

      if (irep == 0)  // Find the indices that sort the candidates per criterion
         qsortdsi ( 0 , n_indep_vars-1 , save_info , index ) ;

      else {
         qsortd ( 0 , n_indep_vars-1 , save_info ) ;
         for (icand=0 ; icand<n_indep_vars ; icand++) {
            if (save_info[icand] >= crits[index[icand]])
               ++mcpt_same_counts[index[icand]] ;
            if (save_info[n_indep_vars-1] >= crits[index[icand]]) // Valid only for largest
               ++mcpt_max_counts[index[icand]] ;
            }
         }

      }  // For all reps

   fprintf ( fp , "\nTransfer entropy of %s", depname);

   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\nPredictors, in order of decreasing transfer entropy" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n                       Variable   Information   Solo pval   Min pval   Max pval" ) ;

   for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates
      k = index[n_indep_vars-1-icand] ;           // Index of sorted candidate
      fprintf ( fp , "\n%31s %11.5lf %12.4lf %10.4lf %10.4lf", names[k], crits[k],
                (double) mcpt_solo_counts[k] / nreps,
                (double) mcpt_same_counts[k] / nreps,
                (double) mcpt_max_counts[k] / nreps ) ;
      }

   MEMTEXT ( "TRANSFER: Finish" ) ;
   fclose ( fp ) ;
   FREE ( work ) ;
   FREE ( crits ) ;
   FREE ( index ) ;
   FREE ( bins_indep ) ;
   FREE ( bins_dep ) ;
   FREE ( mcpt_max_counts ) ;
   FREE ( mcpt_same_counts ) ;
   FREE ( mcpt_solo_counts ) ;
   FREE ( save_info ) ;
   FREE ( count ) ;
   FREE ( ab ) ;
   FREE ( bc ) ;
   FREE ( b ) ;
   free_data ( nvars , names , data ) ;

   MEMCLOSE () ;
   printf ( "\n\nPress any key..." ) ;
   _getch () ;
   return EXIT_SUCCESS ;
}
Ejemplo n.º 15
0
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )

{
   int i, j, k, nvars, ncases, irep, nreps, ivar, nties, ties ;
   int n_indep_vars, idep, icand, *index, *mcpt_max_counts, *mcpt_same_counts, *mcpt_solo_counts ;
   double *data, *work, dtemp, *save_info, criterion, *crits ;
   char filename[256], **names, depname[256] ;
   FILE *fp ;
   MutualInformationAdaptive *mi_adapt ;

/*
   Process command line parameters
*/

#if 1
   if (argc != 5) {
      printf ( "\nUsage: MI_ONLY  datafile  n_indep  depname  nreps" ) ;
      printf ( "\n  datafile - name of the text file containing the data" ) ;
      printf ( "\n             The first line is variable names" ) ;
      printf ( "\n             Subsequent lines are the data." ) ;
      printf ( "\n             Delimiters can be space, comma, or tab" ) ;
      printf ( "\n  n_indep - Number of independent vars, starting with the first" ) ;
      printf ( "\n  depname - Name of the 'dependent' variable" ) ;
      printf ( "\n            It must be AFTER the first n_indep variables" ) ;
      printf ( "\n  nreps - Number of Monte-Carlo permutations, including unpermuted" ) ;
      exit ( 1 ) ;
      }

   strcpy ( filename , argv[1] ) ;
   n_indep_vars = atoi ( argv[2] ) ;
   strcpy ( depname , argv[3] ) ;
   nreps = atoi ( argv[4] ) ;
#else
   strcpy ( filename , "..\\SYNTH.TXT" ) ;
   n_indep_vars = 7 ;
   strcpy ( depname , "SUM1234" ) ;
   nreps = 100 ;
#endif

   _strupr ( depname ) ;

/*
   These are used by MEM.CPP for runtime memory validation
*/

   _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ;
   fp = fopen ( mem_file_name , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MEM.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }
   fclose ( fp ) ;
   mem_keep_log = 0 ;  // Change this to 1 to keep a memory use log (slows execution!)
   mem_max_used = 0 ;

/*
   Open the text file to which results will be written
*/

   fp = fopen ( "MI_ONLY.LOG" , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MI_ONLY.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }

/*
   Read the file and locate the index of the dependent variable
*/

   if (readfile ( filename , &nvars , &names , &ncases , &data ))
      return EXIT_FAILURE ;

   for (idep=0 ; idep<nvars ; idep++) {
      if (! strcmp ( depname , names[idep] ))
         break ;
      }

   if (idep == nvars) {
      printf ( "\nERROR... Dependent variable %s is not in file", depname ) ;
      return EXIT_FAILURE ;
      }

   if (idep < n_indep_vars) {
      printf ( "\nERROR... Dependent variable %s must be beyond independent vars",
               depname ) ;
      return EXIT_FAILURE ;
      }

/*
   Check each variable for ties.  This is not needed for the algorithm,
   but it is good to warn the user, because more than a very few tied values
   in any variable seriously degrades performance of the adaptive partitioning algorithm.
*/

   MEMTEXT ( "MI_ONLY: Work" ) ;
   work = (double *) MALLOC ( ncases * sizeof(double) ) ;
   assert ( work != NULL ) ;

   ties = 0 ;
   assert ( work != NULL ) ;
   for (ivar=0 ; ivar<nvars ; ivar++) {
      if (ivar > n_indep_vars  &&  ivar != idep)
         continue ; // Check only the variables selected by the user
      for (i=0 ; i<ncases ; i++)
         work[i] = data[i*nvars+ivar] ;
      qsortd ( 0 , ncases-1 , work ) ;
      nties = 0 ;
      for (i=1 ; i<ncases ; i++) {
         if (work[i] == work[i-1])
            ++nties ;
         }
      if ((double) nties / (double) ncases > 0.05) {
         ++ties ;
         fprintf ( fp , "\nWARNING... %s has %.2lf percent ties!",
                   names[ivar], 100.0 * nties / (double) ncases ) ;
         }
      } // For all variables
   if (ties) {
      fprintf ( fp , "\nThe presence of ties will seriously degrade" ) ;
      fprintf ( fp , "\nperformance of the adaptive partitioning algorithm\n\n" ) ;
      }

/*
   Allocate scratch memory and create the MutualInformation object using the
   dependent variable

   crits - Mutual information criterion
   index - Indices that sort the criterion
   save_info - Ditto, this is univariate information, to be sorted
   mi_adapt - The MutualInformation object, constructed with the 'dependent' variable
*/

   MEMTEXT ( "MI_ONLY work allocs plus MutualInformation" ) ;
   crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( crits != NULL ) ;
   index = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( index != NULL ) ;
   mcpt_max_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_max_counts != NULL ) ;
   mcpt_same_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_same_counts != NULL ) ;
   mcpt_solo_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_solo_counts != NULL ) ;
   save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( save_info != NULL ) ;

   for (irep=0 ; irep<nreps ; irep++) {

      for (i=0 ; i<ncases ; i++)            // Get the 'dependent' variable
         work[i] = data[i*nvars+idep] ;

//    Shuffle dependent variable if in permutation run (irep>0)

      if (irep) {                   // If doing permuted runs, shuffle
         i = ncases ;              // Number remaining to be shuffled
         while (i > 1) {            // While at least 2 left to shuffle
            j = (int) (unifrand () * i) ;
            if (j >= i)
               j = i - 1 ;
            dtemp = work[--i] ;
            work[i] = work[j] ;
            work[j] = dtemp ;
            }
         }

      // Here we use a tiny split theshold (instead of the usual 6.0) so that it picks up
      // small amounts of mutual information (perhaps including noise).
      // If we used 6.0, nearly all permutations of any reasonably sized dataset
      // would have a computed mutual information of zero.  It's safe picking up
      // some noise because the permutation test will account for this.

      mi_adapt = new MutualInformationAdaptive ( ncases , work , 1 , 0.1 ) ; // Deliberately tiny for low information
      assert ( mi_adapt != NULL ) ;

/*
   Compute and save the mutual information for the dependent variable
   with each individual independent variable candidate.
*/

      for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
         for (i=0 ; i<ncases ; i++)
            work[i] = data[i*nvars+icand] ;

         criterion = mi_adapt->mut_inf ( work , 1 ) ;

         save_info[icand] = criterion ; // We will sort this when all candidates are done
                                        
         if (irep == 0) {               // If doing original (unpermuted), save criterion
            index[icand] = icand ;      // Will need original indices when criteria are sorted
            crits[icand] = criterion ;
            mcpt_max_counts[icand] = mcpt_same_counts[icand] = mcpt_solo_counts[icand] = 1 ;  // This is >= itself so count it now
            }
         else {
            if (criterion >= crits[icand])
               ++mcpt_solo_counts[icand] ;
            }
         } // Initial list of all candidates

      delete mi_adapt ;
      mi_adapt = NULL ;

      if (irep == 0)  // Find the indices that sort the candidates per criterion
         qsortdsi ( 0 , n_indep_vars-1 , save_info , index ) ;

      else {
         qsortd ( 0 , n_indep_vars-1 , save_info ) ;
         for (icand=0 ; icand<n_indep_vars ; icand++) {
            if (save_info[icand] >= crits[index[icand]])
               ++mcpt_same_counts[index[icand]] ;
            if (save_info[n_indep_vars-1] >= crits[index[icand]]) // Valid only for largest
               ++mcpt_max_counts[index[icand]] ;
            }
         }

      }  // For all reps

   fprintf ( fp , "\nAdaptive partitioning mutual information of %s", depname);

   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\nPredictors, in order of decreasing mutual information" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n                       Variable   Information   Solo pval   Min pval   Max pval" ) ;

   for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates
      k = index[n_indep_vars-1-icand] ;           // Index of sorted candidate
      fprintf ( fp , "\n%31s %11.5lf %12.4lf %10.4lf %10.4lf", names[k], crits[k],
                (double) mcpt_solo_counts[k] / nreps,
                (double) mcpt_same_counts[k] / nreps,
                (double) mcpt_max_counts[k] / nreps ) ;
      }

   MEMTEXT ( "MI_ONLY: Finish" ) ;
   fclose ( fp ) ;
   FREE ( work ) ;
   FREE ( crits ) ;
   FREE ( index ) ;
   FREE ( mcpt_max_counts ) ;
   FREE ( mcpt_same_counts ) ;
   FREE ( mcpt_solo_counts ) ;
   FREE ( save_info ) ;
   free_data ( nvars , names , data ) ;

   MEMCLOSE () ;
   printf ( "\n\nPress any key..." ) ;
   _getch () ;
   return EXIT_SUCCESS ;
}
Ejemplo n.º 16
0
static double genbeta(double aa)
/*
**********************************************************************
     float genbet(float aa,float bb)
               GENerate BETa random deviate
                              Function
     Returns a single random deviate from the beta distribution with
     parameters A and B.  The density of the beta is
               x^(a-1) * (1-x)^(b-1) / B(a,b) for 0 < x < 1
                              Arguments
     aa --> First parameter of the beta distribution

     bb --> Second parameter of the beta distribution

                              Method
     R. C. H. Cheng
     Generating Beta Variatew with Nonintegral Shape Parameters
     Communications of the ACM, 21:317-322  (1978)
     (Algorithms BB and BC)
**********************************************************************
*/
{
   double bb = aa;

#define expmax 89.0
#define infnty 1.0E38
static double olda = -1.0;
static double oldb = -1.0;
static double genbet,a,alpha,b,beta,delta,gamma,k1,k2,r,s,t,u1,u2,v,w,y,z;
static long qsame;

    qsame = olda == aa && oldb == bb;
    if(qsame) goto S20;
    if(!(aa <= 0.0 || bb <= 0.0)) goto S10;
S10:
    olda = aa;
    oldb = bb;
S20:
    if(!(MIN(aa,bb) > 1.0)) goto S100;
/*
     Alborithm BB
     Initialize
*/
    if(qsame) goto S30;
    a = MIN(aa,bb);
    b = MAX(aa,bb);
    alpha = a+b;
    beta = sqrt((alpha-2.0)/(2.0*a*b-alpha));
    gamma = a+1.0/beta;
S30:
S40:
    u1 = unifrand();
/*
     Step 1
*/
    u2 = unifrand();
    v = beta*log(u1/(1.0-u1));
    if(!(v > expmax)) goto S50;
    w = infnty;
    goto S60;
S50:
    w = a*exp(v);
S60:
    z = pow(u1,2.0)*u2;
    r = gamma*v-1.3862944;
    s = a+r-w;
/*
     Step 2
*/
    if(s+2.609438 >= 5.0*z) goto S70;
/*
     Step 3
*/
    t = log(z);
    if(s > t) goto S70;
/*
     Step 4
*/
    if(r+alpha*log(alpha/(b+w)) < t) goto S40;
S70:
/*
     Step 5
*/
    if(!(aa == a)) goto S80;
    genbet = w/(b+w);
    goto S90;
S80:
    genbet = b/(b+w);
S90:
    goto S230;
S100:
/*
     Algorithm BC
     Initialize
*/
    if(qsame) goto S110;
    a = MAX(aa,bb);
    b = MIN(aa,bb);
    alpha = a+b;
    beta = 1.0/b;
    delta = 1.0+a-b;
    k1 = delta*(1.38889E-2+4.16667E-2*b)/(a*beta-0.777778);
    k2 = 0.25+(0.5+0.25/delta)*b;
S110:
S120:
    u1 = unifrand();
/*
     Step 1
*/
    u2 = unifrand();
    if(u1 >= 0.5) goto S130;
/*
     Step 2
*/
    y = u1*u2;
    z = u1*y;
    if(0.25*u2+z-y >= k1) goto S120;
    goto S170;
S130:
/*
     Step 3
*/
    z = pow(u1,2.0)*u2;
    if(!(z <= 0.25)) goto S160;
    v = beta*log(u1/(1.0-u1));
    if(!(v > expmax)) goto S140;
    w = infnty;
    goto S150;
S140:
    w = a*exp(v);
S150:
    goto S200;
S160:
    if(z >= k2) goto S120;
S170:
/*
     Step 4
     Step 5
*/
    v = beta*log(u1/(1.0-u1));
    if(!(v > expmax)) goto S180;
    w = infnty;
    goto S190;
S180:
    w = a*exp(v);
S190:
    if(alpha*(log(alpha/(b+w))+v)-1.3862944 < log(z)) goto S120;
S200:
/*
     Step 6
*/
    if(!(a == aa)) goto S210;
    genbet = w/(b+w);
    goto S220;
S210:
    genbet = b/(b+w);
S230:
S220:
    return genbet;
#undef expmax
#undef infnty
}
Ejemplo n.º 17
0
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )
{
   int i, ncases, irep, nreps, m, n_lower, n_upper, n_ks2, n_ks_null, n_ks_alt ;
   double *x, pval, conf, pessimistic_lower, pessimistic_upper ;
   double ks_two, ks_one, D, Dp, Dm ;

   if (argc != 5) {
      printf ( "\nUsage: ConfConf  ncases  pval  conf  nreps" ) ;
      printf ( "\n  ncases - Number of cases in the sample" ) ;
      printf ( "\n  pval - Probability value (<0.5) for quantile test" ) ;
      printf ( "\n  conf - Desired confidence value (<0.5) for both tests" ) ;
      printf ( "\n  nreps - Number of replications" ) ;
      exit ( 1 ) ;
      }

   ncases = atoi ( argv[1] ) ;
   pval = atof ( argv[2] ) ;
   conf = atof ( argv[3] ) ;
   nreps = atoi ( argv[4] ) ;

   if (ncases < 10) {
      printf ( "\nERROR.. Must have at least 10 cases" ) ;
      exit ( 1 ) ;
      }

   if (pval * ncases < 1.0  ||  pval >= 0.5) {
      printf ( "\nERROR.. Pval too small or too large" ) ;
      exit ( 1 ) ;
      }

   if (conf <= 0.0  ||  conf >= 0.5) {
      printf ( "\nERROR.. Conf must be greater than 0 and less than 0.5" ) ;
      exit ( 1 ) ;
      }

   if (nreps < 1) {
      printf ( "\nERROR.. Must have at least 1 replication" ) ;
      exit ( 1 ) ;
      }


/*
   Allocate memory and initialize
*/

   x = (double *) malloc ( ncases * sizeof(double) ) ;

   m = (int) (pval * ncases) ;  // Conservative order statistic for bound
   pessimistic_lower = quantile_conf ( ncases , m , conf ) ;
   pessimistic_upper = 1.0 - pessimistic_lower ;
   ks_two = inverse_ks ( ncases , 1.0 - conf ) ;       // Two-tailed test
   ks_one = inverse_ks ( ncases , 1.0 - 2.0 * conf ) ; // One-tailed test

   printf ( "\nSuppose the model predicts values near 0 for the null hypothesis" ) ;
   printf ( "\nand values near 1 for the alternative hypothesis." ) ;

   printf ( "\n\nIf the dataset represents the null hypothesis, the threshold" ) ;
   printf ( "\nfor rejecting the null at p=%.4lf is given by the %d'th order statistic.",
            pval, ncases - m + 1 ) ;
   printf ( "\nThis is a conservative estimate of the %.4lf quantile", 1.0-pval ) ;
   printf ( "\nThere is only a %.4lf chance that it will really be the %.4lf quantile or worse.",
            conf, pessimistic_upper ) ;

   printf ( "\n\nIf the dataset represents the alternative hypothesis, the threshold" ) ;
   printf ( "\nfor rejecting the alt at p=%.4lf is given by the %d'th order statistic.",
            pval, m ) ;
   printf ( "\nThis is a conservative estimate of the %.4lf quantile", pval ) ;
   printf ( "\nThere is only a %.4lf chance that it will really be the %.4lf quantile or worse.",
            conf, pessimistic_lower) ;

   printf ( "\n\nKS thresholds: two-tailed KS = %.4lf  one-tailed KS = %.4lf",
            ks_two, ks_one ) ;

/*
   Now generate nreps samples.  Verify that our required confidence level
   is observed.  Note that the fact that this test uses a uniform distribution
   does not in any way limit its applicability to uniform distributions.
   If one were to generate cases from any other reasonable distribtion,
   the pessimistic quantile bounds would have to be transformed similarly.
   The result is that the inequalities below would pass or fail identically.
   We count the number of times 'disaster' happens.
   Disaster is when the order statistic used for the threshold is toward the
   inside (center) of the distribution, meaning that if this order statistic
   had been used as a threshold, more of the distribution would be outside
   the threshold than the user expected.  We expect disaster to happen with
   probability equal to the specified conf parameter.

   For the two-tailed Kolmogorov-Smirnov test, disaster is when the empirical
   CDF deviates (above or below) from the correct value by more than the
   conf-inspired value.  For the one-tailed test in which the dataset is from
   the NULL distribution, disaster is when the empirical CDF exceeds the true
   CDF, a situation that would encourage false rejection of the null hypothesis.
   This is measured by D+.  For the one-tailed test in which the dataset is from
   the ALT distribution, disaster is when the empirical CDF is less than the
   true CDF, a situation that would encourage false rejection of the alternative
   hypothesis.  This is measured by D-.
*/

   n_lower = n_upper = n_ks2 = n_ks_null = n_ks_alt = 0 ;

   for (irep=0 ; irep<nreps ; irep++) {

      for (i=0 ; i<ncases ; i++)
         x[i] = unifrand () ;
      qsortd ( 0 , ncases-1 , x ) ;

      if (x[m-1] > pessimistic_lower)
         ++n_lower ;

      if (x[ncases-m] < pessimistic_upper)
         ++n_upper ;

      D = ks_test ( ncases , x , &Dp , &Dm ) ;
      if (D > ks_two)
         ++n_ks2 ;
      if (Dp > ks_one)
         ++n_ks_null ;
      if (Dm > ks_one)
         ++n_ks_alt ;
      }

   printf ( "\nPoint failure (expected=%.4lf)  Lower=%.4lf  Upper=%.4lf",
            conf, (double) n_lower / nreps, (double) n_upper / nreps) ;
   printf ( "\nKS failure:  two-tailed = %.4lf  NULL = %.4lf  ALT = %.4lf",
            (double) n_ks2 / nreps, (double) n_ks_null / nreps,
            (double) n_ks_alt / nreps) ;

   free ( x ) ;
   return ( 0 ) ;
}
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )

{
   int i, j, k, nsamps, ntries, itype, divisor, itry, npart ;
   int isplit, nsplits, splits[10], nmiss ;
   short int *xbins, *ybins ;
   double param, ptie, *x, *y, x1, x2, result, prior_x1, p, sum, marg1, marg2 ;
   double ent, denom, cond, low0, low1, high0, high1, missfrac ;
   double right, wrong0, wrong1, cut0, cut1, cut2, cut3, cut4 ;
   double correctMI[10], total[10], bias[10], std_err[10] ;
   double lower0[10], upper0[10], lower1[10], upper1[10], miss[10] ;
   double outside0[10], outside1[10] ;
   MutualInformationDiscrete *mi ;

/*
   Process command line parameters
*/

#if 1
   if (argc != 6) {
      printf ( "\nUsage: TEST_DIS nsamples ntries type parameter ptie" ) ;
      printf ( "\n  nsamples - Number of cases in the dataset" ) ;
      printf ( "\n  ntried - Number of Monte-Carlo replications" ) ;
      printf ( "\n  type - Type of test" ) ;
      printf ( "\n    0=bivariate normal with specified correlation" ) ;
      printf ( "\n    1=discrete bins with uniform error distribution" ) ;
      printf ( "\n    2=discrete bins with triangular error distribution" ) ;
      printf ( "\n    3=discrete bins with cyclic error distribution" ) ;
      printf ( "\n    4=discrete bins with attractive class error distribution" ) ;
      printf ( "\n  parameter - Depends on type of test" ) ;
      printf ( "\n    0 - Correlation" ) ;
      printf ( "\n    >0 - error probability" ) ;
      printf ( "\n  ptie - If typ=0, probability of a tied case, else ignored" ) ;
      exit ( 1 ) ;
      }

   nsamps = atoi ( argv[1] ) ;
   ntries = atoi ( argv[2] ) ;
   itype = atoi ( argv[3] ) ;
   param = atof ( argv[4] ) ;
   ptie = atof ( argv[5] ) ;
#else
   nsamps = 1000 ;
   ntries = 10000 ;
   itype = 2 ;
   param = 0.2 ;
   ptie = 0.0 ;
#endif

   if ((nsamps <= 0)  ||  (ntries <= 0)  ||  (param < 0.0)  ||  (param > 1.0)
    ||  (itype < 0)  ||  (itype > 4)  || (ptie < 0.0)  || (ptie > 1.0)) {
      printf ( "\nUsage: TEST_DIS nsamples ntries type parameter ptie" ) ;
      exit ( 1 ) ;
      }

   if (itype > 0) {
      if (param > 0.5) {
         printf ( "\nNOTE... Reducing P(error) from %.4lf to 0.5", param ) ;
         printf ( "\nPress any key..." ) ;
         param = 0.5 ;
         _getch () ;
         }
      if (param == 0.0)          // Prevent numerical problems
         param = 1.e-14 ;
      if (param == 1.0)
         param = 1.0 - 1.e-14 ;
      }


/*
   Allocate memory and initialize
*/

   divisor = ntries / 100 ;  // This is for progress reports only
   if (divisor < 1)
      divisor = 1 ;

   x = (double *) malloc ( nsamps * sizeof(double) ) ;
   assert ( x != NULL ) ;
   y = (double *) malloc ( nsamps * sizeof(double) ) ;
   assert ( y != NULL ) ;
   xbins = (short int *) malloc ( nsamps * sizeof(short int) ) ;
   assert ( xbins != NULL ) ;
   ybins = (short int *) malloc ( nsamps * sizeof(short int) ) ;
   assert ( ybins != NULL ) ;

/*
   Compute the different numbers of splits
   We increase them by doubling from 2, except that two bins causes various
   problems with the bound algorithms.  So we increase the fist to 3 bins.
*/

   splits[0] = 2 ;
   for (nsplits=1 ; nsplits<10 ; nsplits++) {
      if (nsamps / splits[nsplits-1] < 5)
         break ;
      splits[nsplits] = splits[nsplits-1] * 2 ;
      }
   splits[0] = 3 ;

/*
--------------------------------------------------------------------------------

   Compute the correct mutual information according to the type

--------------------------------------------------------------------------------
*/


/*
   Bivariate normal
*/

   if (itype == 0) {
      for (i=0 ; i<10 ; i++)
         correctMI[i] = -0.5 * log ( 1.0 - param * param ) ;
      }

/*
   Errors are uniformly distributed to all possible error bins
*/

   else if (itype == 1) { // Uniform error distribution
      for (i=0 ; i<nsplits ; i++) {
         j = splits[i] ;  // Number of bins
         p = 1.0 - param ;// Probability of a correct decision
         p /= j ;         // Probability of a given bin being chosen and correct
                          // This is the diagonal of the confusion matrix
         sum = j * p * log(p*j*j) ; // Diagonal
         p = param ;       // Probability of error (off diagonal)
         p /= j * (j-1) ;  // Probability of a given bin being chosen and wrong
                           // This is the off-diagonal elements
         sum += j * (j-1) * p * log(p*j*j) ;
         correctMI[i] = sum ;
         }
      }

/*
   90% of errors go in the upper triangle, 10% in the lower triangle
*/

   else if (itype == 2) { // Triangular error distribution
      for (isplit=0 ; isplit<nsplits ; isplit++) {
         npart = splits[isplit] ; // Number of bins
         right = (1.0 - param) / npart ;
         wrong0 = 0.1 * param / (npart * (npart-1) / 2) ;
         wrong1 = 0.9 * param / (npart * (npart-1) / 2) ;
         sum = 0.0 ;
         for (i=0 ; i<npart ; i++) {
            marg1 = right + i * wrong0 + (npart - 1 - i) * wrong1 ;
            marg2 = right + (npart - 1 - i) * wrong0 ;
            for (j=0 ; j<npart ; j++) {
               if (j < i)
                  sum += wrong0 * log(wrong0/(marg1*marg2)) ;
               else if (j == i)
                  sum += right * log(right/(marg1*marg2)) ;
               else
                  sum += wrong1 * log(wrong1/(marg1*marg2)) ;
               marg2 += wrong1 - wrong0 ;
               }
            }
         correctMI[isplit] = sum ;
         }
      }

/*
   Half of the errors go one bin to the right of the correct bin, and the
   other half go two bins to the right (with wraparound)
*/

   else if (itype == 3) {  // itype=3; Cyclic error distribution
      for (i=0 ; i<nsplits ; i++) {
         j = splits[i] ;  // Number of bins
         p = 1.0 - param ;// Probability of a correct decision
         p /= j ;         // Probability of a given bin being chosen and correct
                          // This is the diagonal of the confusion matrix
         sum = j * p * log(p*j*j) ; // Diagonal
         p = param ;  // Probability of error (off diagonal)
         p /= 2 * j ; // Probability of this adjacent bin being chosen and wrong
         sum += 2 * j * p * log(p*j*j) ;
         correctMI[i] = sum ;
         }
      }

/*
   This is a really complicated test of a couple classes being unnaturally
   attractive.  For the first nbins-2 true classes, most of the errors go to
   the last (rightmost) class, and the rest of the errors go to the second-last
   class.  All other members of the row are zero.
   For the second-last true class, most of the errors go to the last class,
   and the few remaining errors are evenly distributed across the remaining
   classes.  For the last true class, all errors (and it just has a few) are
   evenly distributed across the other classes.
   This tests what happens when most of the errors land in a single class,
   and most of the remaining errors land in a different single class.
*/

   else if (itype == 4) {  // itype=4; Attractive class error distribution
      for (isplit=0 ; isplit<nsplits ; isplit++) {
         npart = splits[isplit] ; // Number of bins
         right = (1.0 - param) / npart ;
         wrong0 = 0.05 * param / ((npart-1) + 2 * (npart-2)) ;
         wrong1 = 0.95 * param / (npart-1) ;
         sum = 0.0 ;
         for (i=0 ; i<npart ; i++) {
            if (i < npart-2) {
               marg1 = right + wrong0 + wrong1 ;
               marg2 = right + 2 * wrong0 ;
               sum += right * log(right/(marg1*marg2)) ;
               marg2 = right + (npart-1) * wrong0 ;
               sum += wrong0 * log(wrong0/(marg1*marg2)) ;
               marg2 = right + (npart-1) * wrong1 ;
               sum += wrong1 * log(wrong1/(marg1*marg2)) ;
               }
            else if (i == npart-2) {
               marg1 = right + (npart-2) * wrong0 + wrong1 ;
               marg2 = right + 2.0 * wrong0 ;
               sum += (npart-2) * wrong0 * log(wrong0/(marg1*marg2)) ;
               marg2 = right + (npart-1) * wrong0 ;
               sum += right * log(right/(marg1*marg2)) ;
               marg2 = right + (npart-1) * wrong1 ;
               sum += wrong1 * log(wrong1/(marg1*marg2)) ;
               }
            else {
               marg1 = right + (npart-3) * wrong0 ;
               marg2 = right + 2.0 * wrong0 ;
               sum += (npart-2) * wrong0 * log(wrong0/(marg1*marg2)) ;
               marg2 = right + (npart-1) * wrong0 ;
               sum += wrong0 * log(wrong0/(marg1*marg2)) ;
               marg2 = right + (npart-1) * wrong1 ;
               sum += right * log(right/(marg1*marg2)) ;
               }
            }
         correctMI[isplit] = sum ;
         }
      }

/*
   Main outer loop does all tries
*/

   for (i=0 ; i<nsplits ; i++)
      total[i] = bias[i] = std_err[i] = lower0[i] = upper0[i] =
                 lower1[i] = upper1[i] = miss[i] = outside0[i] = outside1[i] = 0.0 ;

   for (itry=1 ; itry<=ntries ; itry++) {

      if (((itry-1) % divisor) == 0)
         printf ( "\n\n\nTry %d of %d", itry, ntries ) ;

      if (itype == 0) {  // If bivariate normal, generate the data
         prior_x1 = 0.5 ;             // Arbitrary
         for (i=0 ; i<nsamps ; i++) { // Create bivariate sample with known correlation
            if (unifrand() < ptie)    // Duplicate the prior observation for a tie?
               x1 = prior_x1 ;
            else {
               x1 = normal () ;
               prior_x1 = x1 ;
               }
            x2 = normal () ;
            if (i < nsamps/2) {       // Equally split ties between X and Y
               x[i] = x1 ;
               y[i] = param * x1 + sqrt ( 1.0 - param * param ) * x2 ;
               }
            else {
               y[i] = x1 ;
               x[i] = param * x1 + sqrt ( 1.0 - param * param ) * x2 ;
               }
            }
         }

      for (isplit=0 ; isplit<nsplits ; isplit++) {

         if (itype == 0) {       // Bivariate normal
            npart = splits[isplit] ;
            partition ( nsamps , x , &npart , NULL , xbins ) ;
            npart = splits[isplit] ;
            partition ( nsamps , y , &npart , NULL , ybins ) ;
            }

         else if (itype == 1) {  // Uniform error distribution
            for (i=0 ; i<nsamps ; i++)
               x[i] = unifrand () ;
            npart = splits[isplit] ;
            partition ( nsamps , x , &npart , NULL , xbins ) ;
            for (j=0 ; j<nsamps ; j++) {
               if (unifrand() < param) {
                  for (;;) {  // This is an error
                     ybins[j] = (short int) (0.999999999999 * unifrand() * npart) ;
                     if (xbins[j] != ybins[j]) // Must not accidentally be right!
                        break ;
                     }
                  }
               else   // This is correct
                  ybins[j] = xbins[j] ;
               }
            }

         else if (itype == 2) {  // Triangular error distribution
            npart = splits[isplit] ;
            right = (1.0 - param) / npart ;
            wrong0 = 0.1 * param / (npart * (npart-1) / 2) ; // Lower triangle
            wrong1 = 0.9 * param / (npart * (npart-1) / 2) ; // Upper triangle
            for (j=0 ; j<nsamps ; j++) {
               cut0 = right + (npart-1) * wrong0 ;
               p = unifrand () ;
               for (k=0 ; k<npart ; k++) {
                  if (p < cut0  ||  k == npart-1) {
                     ybins[j] = k ;
                     cut1 = k * wrong1 ;
                     cut2 = cut1 + right ;
                     cut3 = (npart-k-1) * wrong0 ;
                     p = unifrand () * (cut2 + cut3) ;
                     if (p < cut1) {
                        i = (int) (p / cut1 * k) ;
                        xbins[j] = i ;
                        }
                     else if (p < cut2)
                        xbins[j] = k ;
                     else {
                        i = k + (int) (((p - cut2) / cut3) * (npart-k)) ;
                        xbins[j] = i ;
                        }
                     break ;
                     }
                  cut0 += right + (k+1) * wrong1 + (npart-k-2) * wrong0 ;
                  }
               }
            }

         else if (itype == 3) {    // itype == 3 (Cyclic error distribution)
            for (i=0 ; i<nsamps ; i++)
               x[i] = unifrand () ;
            npart = splits[isplit] ;
            partition ( nsamps , x , &npart , NULL , xbins ) ;
            for (j=0 ; j<nsamps ; j++) {
               if (unifrand() < param) {
                  if (unifrand() < 0.5)
                     ybins[j] = (short int) ((xbins[j]+1) % splits[isplit]) ;
                  else
                     ybins[j] = (short int) ((xbins[j]+2) % splits[isplit]) ;
                  }
               else
                  ybins[j] = xbins[j] ;
               }
            }

         else if (itype == 4) {    // itype == 4 (attractive class error distribution)
            npart = splits[isplit] ;
            right = (1.0 - param) / npart ;
            wrong0 = 0.05 * param / ((npart-1) + 2 * (npart-2)) ;
            wrong1 = 0.95 * param / (npart-1) ;
            cut0 = (npart-2) * (right + wrong0 + wrong1) ;
            cut1 = cut0 + (npart-2) * wrong0 + right + wrong1 ;
            cut2 = (npart-2) * wrong0 ;
            cut3 = cut2 + right ;
            cut4 = (npart-1) * wrong0 ;
            for (j=0 ; j<nsamps ; j++) {
               p = unifrand () ;
               if (p < cut0) {
                  k = (int) (p / cut0 * (npart-2)) ;
                  xbins[j] = k ;
                  p = unifrand () * (right + wrong0 + wrong1) ;
                  if (p < right)
                     ybins[j] = k ;
                  else if (p < right + wrong0)
                     ybins[j] = npart-2 ;
                  else
                     ybins[j] = npart-1 ;
                  }
               else if (p < cut1) {
                  xbins[j] = npart-2 ;
                  p = unifrand () * (cut3 + wrong1) ;
                  if (p < cut2) {
                     k = (int) (p / cut2 * (npart-2)) ;
                     ybins[j] = k ;
                     }
                  else if (p < cut3)
                     ybins[j] = npart-2 ;
                  else
                     ybins[j] = npart-1 ;
                  }
               else {
                  xbins[j] = npart-1 ;
                  p = unifrand () * (cut4 + right) ;
                  if (p < cut4) {
                     k = (int) (p / cut4 * (npart-1)) ;
                     ybins[j] = k ;
                     }
                  else
                     ybins[j] = npart-1 ;
                  }
               }
            }

/*
   Create the MutualInformation object.
   Count errors.  This is used only for type 0 (bivariate normal)
*/

         mi = new MutualInformationDiscrete ( nsamps , ybins ) ;
         assert ( mi != NULL ) ;

         nmiss = 0 ;
         for (j=0 ; j<nsamps ; j++) {
            if (xbins[j] != ybins[j])
               ++nmiss ;
            }

         missfrac = (double) nmiss / (double) nsamps ;
         miss[isplit] += missfrac ;

/*
   Compute the mutual information, Y entropy, and conditional entropy
   Tally the mean mutual information and bias and standard error
*/

         result = mi->mut_inf ( xbins ) ; // Mutual information
         ent = mi->entropy () ;           // Y entropy
         cond = ent - result ;            // Conditional entropy H(Y|X)

//         printf ( "\n\nent=%.5lf  cond=%.5lf  MI=%.5lf  hPe=%.5lf  cond_err=%.5lf", /*!!!!!!*/
//                  ent, cond, result, mi->hPe(xbins), mi->conditional_error(xbins)) ; /*!!!!!!*/
//         printf ( "\nnumer0=%.5lf  numer1=%.5lf  lo den=%.5lf  hi den=%.5lf", /*!!!!!!*/
//                  cond - log(2.0), cond - mi->conditional_error ( xbins ),
//                  log(splits[isplit]-1.0), mi->HYe ( xbins )) ; /*!!!!!!*/

         total[isplit] += result ;
         bias[isplit] += result - correctMI[isplit] ;
         std_err[isplit] += (result - correctMI[isplit]) * (result - correctMI[isplit]) ;

/*
   Compute loose and tight lower and upper bounds
*/

         low0 = (cond - log(2.0)) / log ( splits[isplit] - 1.0 ) ;
         low1 = (cond - mi->conditional_error ( xbins )) / log ( splits[isplit] - 1.0 ) ;
         denom = mi->HYe ( xbins ) + 1.e-30 ;
         high0 = cond / denom ;
         high1 = (cond - mi->conditional_error ( xbins )) / denom ;


/*
   Don't allow nonsense lower bound
   Don't go beyond what a naive classifier based on equal priors could do
*/

         if (low0 < 0.0)
            low0 = 0.0 ;

         if (high0 > 1.0 - 1.0 / splits[isplit])
            high0 = 1.0 - 1.0 / splits[isplit] ;

         if (high1 > 1.0 - 1.0 / splits[isplit])
            high1 = 1.0 - 1.0 / splits[isplit] ;

/*
   In rare pathological cases, the limit we just did to the high bound may
   pull it under the low bound.  Prevent this from happening.
*/

         if (low0 > high0)
            low0 = high0 ;

         if (low1 > high1)
            low1 = high1 ;

/*
   Cumulate the mean of the bounds.
*/

         lower0[isplit] += low0 ;
         lower1[isplit] += low1 ;
         upper0[isplit] += high0 ;
         upper1[isplit] += high1 ;

/*
   Count how many times the true population value is outside the computed
   bounds.  Note that I am not aware of any way of computing the expected
   error rate for a bivariate normal, so for the sake of doing something,
   I use the obtained error rate.  Ultimately this will be very good,
   because it will be a reasonably good, asymptotically unbiased Monte-Carlo
   estimator.  Unfortunately, it will take a while to get there, and meanwhile
   outages may be counted.  Expect the outage count to be somewhat inaccurate.
   A better program would not start counting outages until after many replications,
   thus allowing the Monte-Carlo error rate to decently converge before being used.
*/

         if (itype == 0) { // I'm not aware of any simple way to compute true error rate
            if (missfrac < low0  ||  missfrac > high0)
               ++outside0[isplit] ;
            if (missfrac < low1  ||  missfrac > high1)
               ++outside1[isplit] ;
            }

         else {  // We know the true error rate, because it was specified
            if (param < low0  ||  param > high0)
               ++outside0[isplit] ;
            if (param < low1  ||  param > high1)
               ++outside1[isplit] ;
            }

         delete mi ;
         } // For all splits

/*
   Print intermediate results to keep the user happy
*/

      if ((((itry-1) % divisor) == 0) || (itry == ntries) ) { // Don't do this every try!  Too slow.
         if (itry == ntries)
            printf ( "\n\nFinal... n=%d  reps=%d  type=%d  param=%.4lf  ptie=%.4lf\n",
                     nsamps, ntries, itype, param, ptie) ;
         printf ( "\nSplits size  Est. MI  True MI   Bias   StdErr     Lower   Upper   Outside" ) ;
         for (i=0 ; i<nsplits ; i++) {
            printf ( "\n%4d %6d %8.4lf %8.4lf %7.4lf %7.4lf  %8.4lf %7.4lf   %6.3lf",
               splits[i], nsamps/splits[i], total[i]/itry, correctMI[i],
               bias[i]/itry, sqrt ( std_err[i]/itry ),
               lower0[i]/itry, upper0[i]/itry, outside0[i]/itry ) ;
            printf ( "\n                                               %8.4lf %7.4lf   %6.3lf",
               lower1[i]/itry, upper1[i]/itry, outside1[i]/itry ) ;
            }
         }

      if (_kbhit ()) {         // Has the user pressed a key?
         if (_getch() == 27)   // The ESCape key?
            break ;
         }

      } // For all tries

   free ( x ) ;
   free ( y ) ;
   free ( xbins ) ;
   free ( ybins ) ;
   return EXIT_SUCCESS ;
}