Exemplo n.º 1
0
void qsortd ( int first , int last , double *data )
{
   int lower, upper ;
   double ftemp, split ;

   split = data[(first+last)/2] ;
   lower = first ;
   upper = last ;

   do {
      while ( split > data[lower] )
         ++lower ;
      while ( split < data[upper] )
         --upper ;
      if (lower == upper) {
         ++lower ;
         --upper ;
         }
      else if (lower < upper) {
         ftemp = data[lower] ;
         data[lower++] = data[upper] ;
         data[upper--] = ftemp ;
         }
      } while ( lower <= upper ) ;

   if (first < upper)
      qsortd ( first , upper , data ) ;
   if (lower < last)
      qsortd ( lower , last , data ) ;
}
Exemplo n.º 2
0
/**************************************************\
Trie un tableau de notes a[N][2] par le Quick Sort
INPUT: a[][0] doit contenir les N notes (double)
       a[][1] doit contenir l'index des notes
       (lo0, hiO) : indices min et max
OUTPUT:
       a[][1] contientra les index des notes dans
l'ordre des notes croissantes
Utiliser qsortd2 pour utiliser 2 vecteurs au lieu du tableau.
\**************************************************/
void qsortd(double **a, int lo0, int hi0)
{
	int lo = lo0; int hi = hi0; double mid;
	if ( hi0 > lo0)
	{
		mid = a[ ( lo0 + hi0 ) / 2 ][0];
		while( lo <= hi )
		{
			while( ( lo < hi0 ) &&  ( a[lo][0] < mid )) ++lo;
			while( ( hi > lo0 ) &&  ( a[hi][0] > mid )) --hi;
			if( lo <= hi )
			{
				swapQSd(a, lo, hi);
				++lo;
				--hi;
			}
		}
		if( lo0 < hi ) qsortd( a, lo0, hi );
		if( lo < hi0 ) qsortd( a, lo, hi0 );
	}
}
int rr (
   int type ,         // Type of study (SCREEN_RR_? in CONST.H): continuous, tails, discrete)
   int npred ,        // Number of predictors
   int *preds ,       // Their indices are here
   int targetvar ,    // Index of target variable
   int nbins_pred ,   // Number of predictor bins
   int nbins_target , // Number of target bins, 0 for 2 sign-based bins
   double tail_frac , // Tail fraction
   int mcpt_type ,    // 1=complete, 2=cyclic
   int mcpt_reps ,    // Number of MCPT replications, <=1 for no MCPT
   int max_pred       // Max number of predictors in optimal subset
   )
{
   int i, j, k, n, ret_val, ivar, irep, varnum, max_threads, bins_dim ;
   int *index, *stepwise_mcpt_count, *solo_mcpt_count, *stepwise_ivar, *original_stepwise_ivar ;
   int *pred_bin, *redun_pred_bin, *target_bin, *bin_counts ;
   int *work_bin, nkept, best_ivar, *which_preds, *tail_n, *target_bin_ptr ;
   double *casework, *sorted, *mutual, *pred_thresholds, *target_thresholds, *target, *work_target ;
   double *crit, *relevance, *original_relevance, *current_crits, *sorted_crits, best_crit, dtemp ;
   double *pred_bounds, *target_bounds, *pred_marginal, *redun_pred_marginal, *target_marginal ;
   double *stepwise_crit, *original_stepwise_crit ;
   double sum_relevance, *original_sum_relevance, *sum_redundancy ;
   char msg[4096], msg2[4096] ;

   casework = NULL ;
   mutual = NULL ;
   index = NULL ;
   pred_thresholds = NULL ;
   target_thresholds = NULL ;
   pred_bin = NULL ;
   redun_pred_bin = NULL ;
   redun_pred_marginal = NULL ;
   work_bin = NULL ;
   target_bin = NULL ;
   bin_counts = NULL ;
   target = NULL ;
   tail_n = NULL ;

   if (max_pred > npred)   // Watch out for careless user
      max_pred = npred ;

   ret_val = 0 ;

   max_threads = MAX_THREADS ;

/*
   Print header
*/

   audit ( "" ) ;
   audit ( "" ) ;
   audit ( "******************************************************************************" ) ;
   audit ( "*                                                                            *" ) ;
   audit ( "* Computing relevance minus redundancy for optimal predictor subset          *" ) ;
   if (type == SCREEN_RR_CONTINUOUS)
      audit ( "*      Predictors and target are continuous                                  *" ) ;
   else if (type == SCREEN_RR_TAILS) {
      sprintf_s ( msg, "*   %5.3lf predictor tails used                                             *", tail_frac ) ;
      audit ( msg ) ;
      sprintf_s ( msg, "*      %2d target bins                                                        *", nbins_target ) ;
      audit ( msg ) ;
      }
   else if (type == SCREEN_RR_DISCRETE) {
      sprintf_s ( msg, "*      %2d predictor bins                                                     *", nbins_pred ) ;
      audit ( msg ) ;
      sprintf_s ( msg, "*      %2d target bins                                                        *", nbins_target ) ;
      audit ( msg ) ;
      }
   sprintf_s ( msg, "*   %5d predictor candidates                                               *", npred ) ;
   audit ( msg ) ;
   sprintf_s ( msg, "* %7d best predictors will be printed                                    *", max_pred ) ;
   audit ( msg ) ;
   if (mcpt_reps > 1) {
      if (mcpt_type == 1)
         sprintf_s ( msg, "*   %5d replications of complete Monte-Carlo Permutation Test              *", mcpt_reps ) ;
      else if (mcpt_type == 2)
         sprintf_s ( msg, "*   %5d replications of cyclic Monte-Carlo Permutation Test                *", mcpt_reps ) ;
      audit ( msg ) ;
      }
   else {
      sprintf_s ( msg, "*         No Monte-Carlo Permutation Test                                    *" ) ;
      audit ( msg ) ;
      }
   audit ( "*                                                                            *" ) ;
   audit ( "******************************************************************************" ) ;


/*
   Allocate memory needed for all types (CONTINUOUS, TAILS, DISCRETE)
*/

   casework = (double *) malloc ( 2 * n_cases * sizeof(double) ) ;  // Pred, sorted
   sorted = casework + n_cases ;

   mutual = (double *) malloc ( 10 * npred * sizeof(double) ) ;
   crit = mutual + npred ;
   current_crits = crit + npred ;
   sorted_crits = current_crits + npred ;
   stepwise_crit = sorted_crits + npred ;
   original_stepwise_crit = stepwise_crit + npred ;
   relevance = original_stepwise_crit + npred ;
   original_relevance = relevance + npred ;
   sum_redundancy = original_relevance + npred ;
   original_sum_relevance = sum_redundancy + npred ;

   index = (int *) malloc ( 6 * npred * sizeof(int) ) ;
   stepwise_mcpt_count = index + npred ;
   solo_mcpt_count = stepwise_mcpt_count + npred ;
   which_preds = solo_mcpt_count + npred ;
   stepwise_ivar = which_preds + npred ;
   original_stepwise_ivar = stepwise_ivar + npred ;

   if (casework == NULL  ||  mutual == NULL  ||  index == NULL) {
      audit ( "ERROR: Insufficient memory for Relevance minus Redundancy" ) ;
      ret_val = ERROR_INSUFFICIENT_MEMORY ;
      goto FINISH ;
      }


/*
   For CONTINUOUS, allocate and save target
*/

   if (type == SCREEN_RR_CONTINUOUS) {
      target = (double *) malloc ( 2 * n_cases * sizeof(double) ) ;
      work_target = target + n_cases ;
      if (target == NULL) {
         audit ( "ERROR: Insufficient memory for Relevance minus Redundancy" ) ;
         ret_val = ERROR_INSUFFICIENT_MEMORY ;
         goto FINISH ;
         }
      for (i=0 ; i<n_cases ; i++)             // Extract target from database
         target[i] = database[i*n_vars+targetvar] ;
      }


/*
   For binning types (TAILS, DISCRETE), allocate that memory and compute all bin information
*/

   else if (type == SCREEN_RR_TAILS  ||  type == SCREEN_RR_DISCRETE) {
      pred_thresholds = (double *) malloc ( 2 * nbins_pred * npred * sizeof(double) ) ; // pred_thresholds, pred_marginal
      pred_marginal = pred_thresholds + npred * nbins_pred ; // Not needed for computation but nice to print for user
      pred_bin = (int *) malloc ( npred * n_cases * sizeof(int) ) ;
      work_bin = (int *) malloc ( n_cases * sizeof(int) ) ;

      if (type == SCREEN_RR_TAILS) {
         assert ( nbins_pred == 2 ) ;
         k = 3 ;  // We go trinary for redundancy
         }
      else
         k = nbins_pred ;

      if (k >= nbins_target)
         bins_dim = k * k ;
      else
         bins_dim = k * nbins_target ;
      bin_counts = (int *) malloc ( max_threads * bins_dim * sizeof(int) ) ;

      tail_n = (int *) malloc ( npred * sizeof(int) ) ;  // We use tail_n[0] if DISCRETE, so we need it for eitherz

      if (type == SCREEN_RR_TAILS) {
         target_thresholds = (double *) malloc ( 2 * nbins_target * npred * sizeof(double) ) ; // target_thresholds, target_marginal
         target_marginal = target_thresholds + nbins_target * npred ;
         target_bin = (int *) malloc ( npred * n_cases * sizeof(int) ) ; // Target bin separate for each predictor
         redun_pred_bin = (int *) malloc ( npred * n_cases * sizeof(int) ) ; // Trinary for redundancy calculation
         redun_pred_marginal = (double *) malloc ( 3 * npred * sizeof(double) ) ; // Trinary
         }
      else if (type == SCREEN_RR_DISCRETE) {
         target_thresholds = (double *) malloc ( 2 * nbins_target * sizeof(double) ) ; // target_thresholds, target_marginal
         target_marginal = target_thresholds + nbins_target ;
         target_bin = (int *) malloc ( n_cases * sizeof(int) ) ; // Target bin the same for all predictors
         }

      if (pred_thresholds == NULL  ||  target_thresholds == NULL  ||
          pred_bin == NULL  ||  work_bin == NULL  ||  target_bin == NULL  ||  bin_counts == NULL) {
         audit ( "ERROR: Insufficient memory for Relevance minus Redundancy" ) ;
         ret_val = ERROR_INSUFFICIENT_MEMORY ;
         goto FINISH ;
         }

/*
   Make an initial pass through the data to find predictor thresholds and
   permanently save bin indices for predictors and target.
   If tails-only, we must save the associated target subset indices, separately for each predictor.
   If not tails only, do target when ivar=-1.
*/

      for (ivar=-1 ; ivar<npred ; ivar++) {
         if (ivar == -1) {                   // If this is target pass
            if (type == SCREEN_RR_TAILS) // But user specified tails only
               continue ;                    // then we process the targets separately for each predictor's subset
            }
         else
            varnum = preds[ivar] ;

         if (user_pressed_escape()) {
            audit ( "ERROR: User pressed ESCape during RELEVANCE MINUS REDUNDANCY" ) ;
            ret_val = ERROR_ESCAPE ;
            goto FINISH ;
            }

         // At this point, one of three things holds:
         //   Case 1: ivar=-1 (which implies not TAILS): This is the target
         //   Case 2: ivar>=0, not TAILS: This is a predictor
         //   Case 3: ivar>=0, TAILS: This is a predictor AND we must save the corresponding target

         // ------> Case 1: ivar=-1 (which implies not TAILS): This is the target

         if (ivar == -1) {
            for (i=0 ; i<n_cases ; i++)               // Extract target from database
               casework[i] = database[i*n_vars+targetvar] ;
            target_bounds = target_thresholds ;
            k = nbins_target ;
            partition ( n_cases , casework , &k , target_bounds , target_bin ) ;
            if (k <nbins_target) {
               sprintf_s ( msg, "ERROR: Numerous ties reduced target bins to %d", k ) ;
               audit ( msg ) ;
               ret_val = ERROR_SYNTAX ;
               goto FINISH ;
               }
            assert ( k == nbins_target ) ;
            tail_n[0] = n_cases ;       // Later code is simplified if we save this as if TAILS
            }

         // ------> Case 2: ivar>=0, not TAILS: This is a predictor

         else if (ivar >= 0  &&  type != SCREEN_RR_TAILS) {
            for (i=0 ; i<n_cases ; i++)               // Extract predictor from database
               casework[i] = database[i*n_vars+varnum] ;
            pred_bounds = pred_thresholds + ivar * nbins_pred ;
            k = nbins_pred ;
            partition ( n_cases , casework , &k , pred_bounds , pred_bin+ivar*n_cases ) ;
            if (k <nbins_pred) {
               sprintf_s ( msg, "ERROR: Numerous ties reduced predictor %s bins to %d", var_names[preds[ivar]], k ) ;
               audit ( msg ) ;
               ret_val = ERROR_SYNTAX ;
               goto FINISH ;
               }
            assert ( k == nbins_pred ) ;
            }

         // ------> Case 3: ivar>=0, TAILS: This is a predictor AND we must save the corresponding target

         else if (ivar >= 0  &&  type == SCREEN_RR_TAILS) {
            // Compute predictor bounds per tail fraction
            for (i=0 ; i<n_cases ; i++)               // Extract predictor from database
               casework[i] = database[i*n_vars+varnum] ;
            qsortd ( 0 , n_cases-1 , casework ) ;
            pred_bounds = pred_thresholds + ivar * nbins_pred ;
            k = (int) (tail_frac * (n_cases+1)) - 1 ;
            if (k < 0)
               k = 0 ;
            pred_bounds[0] = casework[k] ;
            pred_bounds[1] = casework[n_cases-1-k] ;
            // Compute and save predictor bin indices; Also save target for soon computing its bounds and indices
            n = 0 ;
            for (i=0 ; i<n_cases ; i++) {
               if (database[i*n_vars+varnum] <= pred_bounds[0]) {
                  pred_bin[ivar*n_cases+n] = 0 ;
                  redun_pred_bin[ivar*n_cases+i] = 0 ;  // Need this for intra-predictor redundancy
                  }
               else if (database[i*n_vars+varnum] >= pred_bounds[1]) {
                  pred_bin[ivar*n_cases+n] = 1 ;
                  redun_pred_bin[ivar*n_cases+i] = 1 ;
                  }
               else {
                  redun_pred_bin[ivar*n_cases+i] = 2 ;
                  continue ;
                  }
               casework[n] = database[i*n_vars+targetvar] ;
               ++n ;
               }
            tail_n[ivar] = n ;

            // Compute the target bounds based on this 'predictor tail' subset of the entire dataset
            target_bounds = target_thresholds + ivar * nbins_target ;
            k = nbins_target ;
            partition ( n , casework , &k , target_bounds , target_bin+ivar*n_cases ) ;
            if (k <nbins_target) {
               sprintf_s ( msg, "ERROR: Numerous ties reduced target bins to %d", k ) ;
               audit ( msg ) ;
               ret_val = ERROR_SYNTAX ;
               goto FINISH ;
               }
            }

         else
            assert ( 1 == 0 ) ;

         } // For ivar (reading each variable)


/*
   All thresholds (predictor and target) are computed and saved.
   The predictor and target bin indices are also saved.
   If not TAILS, the saved target bin indices are based on the entire dataset,
   and the saved target thresholds are similarly for the entire dataset.
   But if TAILS, each predictor candidate will have its own target subset
   and thresholds corresponding to that subset.

   Print the thresholds for the user's edification
*/

      audit ( "" ) ;
      audit ( "" ) ;
      audit ( "The bounds that define bins are now shown" ) ;
      audit ( "" ) ;

      if (type == SCREEN_RR_TAILS) {
         audit ( "Target bounds are shown (after :) separately for each predictor candidate" ) ;
         audit ( "" ) ;
         audit ( "       Variable  Predictor bounds... : Target bounds" ) ;
         audit ( "" ) ;
         }

      else {
         audit ( "Target bounds are based on the entire dataset..." ) ;
         sprintf_s ( msg , "%12.5lf", target_thresholds[0] ) ;
         for (i=1 ; i<nbins_target-1 ; i++) {
            sprintf_s ( msg2 , "  %12.5lf", target_thresholds[i] ) ;
            strcat_s ( msg , msg2 ) ;
            }

         audit ( msg ) ;
         audit ( "" ) ;
         audit ( "       Variable  Bounds..." ) ;
         audit ( "" ) ;
         }

      for (ivar=0 ; ivar<npred ; ivar++) {
         pred_bounds = pred_thresholds + ivar * nbins_pred ;
         sprintf_s ( msg, "%15s  %12.5lf", var_names[preds[ivar]], pred_bounds[0] ) ;
         k = (type == SCREEN_RR_TAILS) ? 2 : nbins_pred-1 ;
         for (i=1 ; i<k ; i++) {
            sprintf_s ( msg2 , "  %12.5lf", pred_bounds[i] ) ;
            strcat_s ( msg , msg2 ) ;
            }
         if (type == SCREEN_RR_TAILS) {
            target_bounds = target_thresholds + ivar * nbins_target ;
            sprintf_s ( msg2 , "  :  %12.5lf", target_bounds[0] ) ;
            strcat_s ( msg , msg2 ) ;
            for (i=1 ; i<nbins_target-1 ; i++) {
               sprintf_s ( msg2 , "  %12.5lf", target_bounds[i] ) ;
               strcat_s ( msg , msg2 ) ;
               }
            } // If TAILS
         audit ( msg ) ;
         } // For all predictors

/*
   Compute marginals
*/

      for (ivar=0 ; ivar<npred ; ivar++) {

         for (i=0 ; i<nbins_pred ; i++)
            pred_marginal[ivar*nbins_pred+i] = 0.0 ;

         if (ivar==0  ||  type == SCREEN_RR_TAILS) {
            for (i=0 ; i<nbins_target ; i++)
               target_marginal[ivar*nbins_target+i] = 0.0 ;
            }

         for (i=0 ; i<n_cases ; i++) {
            ++pred_marginal[ivar*nbins_pred+pred_bin[ivar*n_cases+i]] ;
            if (type == SCREEN_UNIVAR_TAILS) {
               ++target_marginal[ivar*nbins_target+target_bin[ivar*n_cases+i]] ;
               if (i == tail_n[ivar]-1)
                  break ;
               }
            else if (ivar == 0)                           // Do target just once
               ++target_marginal[target_bin[i]] ;
            } // For all cases

         if (type == SCREEN_RR_TAILS) {  // Trinary
            for (i=0 ; i<3 ; i++)
               redun_pred_marginal[ivar*3+i] = 0.0 ;
            for (i=0 ; i<n_cases ; i++)
               ++redun_pred_marginal[ivar*3+redun_pred_bin[ivar*n_cases+i]] ;
            }
         }

      for (ivar=0 ; ivar<npred ; ivar++) {  // Divide counts by number of cases to get marginal

         if (type == SCREEN_UNIVAR_TAILS) {
            assert ( nbins_pred == 2 ) ;
            for (i=0 ; i<nbins_pred ; i++)
               pred_marginal[ivar*nbins_pred+i] /= tail_n[ivar] ;
            for (i=0 ; i<3 ; i++)
               redun_pred_marginal[ivar*3+i] /= n_cases ;
            }
         else {
            for (i=0 ; i<nbins_pred ; i++)
               pred_marginal[ivar*nbins_pred+i] /= n_cases ;
            }

         if (ivar==0  ||  type == SCREEN_UNIVAR_TAILS) {
            for (i=0 ; i<nbins_target ; i++)
               target_marginal[ivar*nbins_target+i] /= tail_n[ivar] ;
            }
         }


/*
   Print the marginals for the user's edification
*/

      audit ( "" ) ;
      audit ( "" ) ;
      audit ( "The marginal distributions are now shown." ) ;
      audit ( "If the data is continuous, the marginals will be nearly equal." ) ;
      audit ( "Widely unequal marginals indicate potentially problematic ties." ) ;
      audit ( "" ) ;

      if (type == SCREEN_UNIVAR_TAILS) {
         audit ( "Target marginals are shown (after :) separately for each predictor candidate" ) ;
         audit ( "" ) ;
         audit ( "       Variable  Predictor marginals... : Target marginals" ) ;
         audit ( "" ) ;
         }

      else {
         audit ( "Target marginals are based on the entire dataset..." ) ;
         sprintf_s ( msg , "%12.5lf", target_marginal[0] ) ;
         for (i=1 ; i<nbins_target ; i++) {
            sprintf_s ( msg2 , "  %12.5lf", target_marginal[i] ) ;
            strcat_s ( msg , msg2 ) ;
            }

         audit ( msg ) ;
         audit ( "" ) ;
         audit ( "       Variable    Marginal..." ) ;
         audit ( "" ) ;
         }

      for (ivar=0 ; ivar<npred ; ivar++) {
         sprintf_s ( msg, "%15s  %12.5lf", var_names[preds[ivar]], pred_marginal[ivar*nbins_pred+0] ) ;
         for (i=1 ; i<nbins_pred ; i++) {
            sprintf_s ( msg2 , "  %12.5lf", pred_marginal[ivar*nbins_pred+i] ) ;
            strcat_s ( msg , msg2 ) ;
            }
         if (type == SCREEN_UNIVAR_TAILS) {
            sprintf_s ( msg2 , "  :  %12.5lf", target_marginal[ivar*nbins_target+0] ) ;
            strcat_s ( msg , msg2 ) ;
            for (i=1 ; i<nbins_target ; i++) {
               sprintf_s ( msg2 , "  %12.5lf", target_marginal[ivar*nbins_target+i] ) ;
               strcat_s ( msg , msg2 ) ;
               }
            } // If TAILS
         audit ( msg ) ;
         } // For all predictors

      disallow_menu = 0 ;
      mouse_cursor_arrow () ;
      end_progbar () ;
      } // If binning type (TAILS, DISCRETE)



/*
--------------------------------------------------------------------------------

   Outer-most loop does MCPT replications

--------------------------------------------------------------------------------
*/


   if (mcpt_reps < 1)
      mcpt_reps = 1 ;

   for (irep=0 ; irep<mcpt_reps ; irep++) {

/*
   Shuffle target if in permutation run (irep>0)
*/

      if (irep) {                  // If doing permuted runs, shuffle

         if (mcpt_type == 1) {      // Complete
            if (type == SCREEN_UNIVAR_CONTINUOUS) {
               i = n_cases ;        // Number remaining to be shuffled
               while (i > 1) {      // While at least 2 left to shuffle
                  j = (int) (unifrand_fast () * i) ;
                  if (j >= i)
                     j = i - 1 ;
                  dtemp = target[--i] ;
                  target[i] = target[j] ;
                  target[j] = dtemp ;
                  }
               } // If not using bins
            else if (type == SCREEN_UNIVAR_TAILS) {   // Each predictor has its own target subset
               for (ivar=0 ; ivar<npred ; ivar++) {
                  target_bin_ptr = target_bin + ivar * n_cases ;
                  i = tail_n[ivar] ;           // Number remaining to be shuffled
                  while (i > 1) {              // While at least 2 left to shuffle
                     j = (int) (unifrand_fast () * i) ;
                     if (j >= i)
                        j = i - 1 ;
                     k = target_bin_ptr[--i] ;
                     target_bin_ptr[i] = target_bin_ptr[j] ;
                     target_bin_ptr[j] = k ;
                     }
                  }
               } // Else if TAILS
            else {
               i = n_cases ;          // Number remaining to be shuffled
               while (i > 1) {        // While at least 2 left to shuffle
                  j = (int) (unifrand_fast () * i) ;
                  if (j >= i)
                     j = i - 1 ;
                  k = target_bin[--i] ;
                  target_bin[i] = target_bin[j] ;
                  target_bin[j] = k ;
                  }
               } // Else discrete using entire dataset
            } // Type 1, Complete
         else if (mcpt_type == 2) { // Cyclic
            if (type == SCREEN_UNIVAR_CONTINUOUS) {
               j = (int) (unifrand_fast () * n_cases) ;
               if (j >= n_cases)
                  j = n_cases - 1 ;
               for (i=0 ; i<n_cases ; i++)
                  casework[i] = target[(i+j)%n_cases] ;
               for (i=0 ; i<n_cases ; i++)
                  target[i] = casework[i]  ;

               } // If continuous
            else if (type == SCREEN_UNIVAR_TAILS) {   // Each predictor has its own target subset
               for (ivar=0 ; ivar<npred ; ivar++) {
                  target_bin_ptr = target_bin + ivar * n_cases ;
                  k = tail_n[ivar] ;
                  j = (int) (unifrand_fast () * k) ;
                  if (j >= k)
                     j = k - 1 ;
                  for (i=0 ; i<k ; i++)
                     work_bin[i] = target_bin_ptr[(i+j)%k] ;
                  for (i=0 ; i<k ; i++)
                     target_bin_ptr[i] = work_bin[i]  ;
                  }
               } // Else if TAILS
            else {
               j = (int) (unifrand_fast () * n_cases) ;
               if (j >= n_cases)
                  j = n_cases - 1 ;
               for (i=0 ; i<n_cases ; i++)
                  work_bin[i] = target_bin[(i+j)%n_cases] ;
               for (i=0 ; i<n_cases ; i++)
                  target_bin[i] = work_bin[i]  ;
               } // Else discrete using entire dataset
            } // Type 2, Cyclic

         } // If in permutation run (irep > 0)


/*
-----------------------------------------------------------------------------------

   First step: Compute and save criterion for all individual candidates

-----------------------------------------------------------------------------------
*/

      for (i=0 ; i<npred ; i++)   // We'll test all candidates
         which_preds[i] = i ;

      if (type == SCREEN_RR_TAILS)
         ret_val = rr_threaded ( type , database , n_vars , preds , NULL ,
                                 mcpt_reps , max_threads , n_cases , tail_n , npred , which_preds ,
                                 nbins_pred , pred_bin , pred_marginal ,
                                 nbins_target , target_bin , target_marginal ,
                                 crit , bins_dim , bin_counts ) ;
      else
         ret_val = rr_threaded ( type , database , n_vars , preds , target ,
                                 mcpt_reps , max_threads , n_cases , NULL , npred , which_preds ,
                                 nbins_pred , pred_bin , pred_marginal ,
                                 nbins_target , target_bin , target_marginal ,
                                 crit , bins_dim , bin_counts ) ;

      if (user_pressed_escape()  &&  ret_val == 0)
         ret_val = ERROR_ESCAPE ;

      if (ret_val) {
         audit ( "ERROR: User pressed ESCape during RELEVANCE MINUS REDUNDANCY" ) ;
         goto FINISH ;
         }

/*
   The individual mutual information for each predictor has been computed and saved in crit.
   Update 'best' information for this replication.
   Print a sorted table if this is the first replication.  Else update MCPT count.
*/

      for (ivar=0 ; ivar<npred ; ivar++) {
         relevance[ivar] = crit[ivar] ;   // Will need this for Step 2, addition of more predictors
         if (ivar == 0  ||  crit[ivar] > best_crit) {
            best_crit = crit[ivar] ;
            best_ivar = ivar ;
            }
         }

      stepwise_crit[0] = best_crit ;  // Criterion for first var is largest MI
      stepwise_ivar[0] = best_ivar ;  // It's this candidate
      sum_relevance = best_crit ;

      if (irep == 0) {            // Original, unpermuted data

         original_stepwise_crit[0] = best_crit ;  // Criterion for first var is largest MI
         original_stepwise_ivar[0] = best_ivar ;  // It's this candidate
         original_sum_relevance[0] = sum_relevance ;
         stepwise_mcpt_count[0] = 1 ;             // Initialize cumulative MCPT

         // We need original_relevance for printing final table.  Other crits are just for this table.
         for (ivar=0 ; ivar<npred ; ivar++) {
            index[ivar] = ivar ;
            original_relevance[ivar] = sorted_crits[ivar] = current_crits[ivar] = crit[ivar] ;
            solo_mcpt_count[ivar] = 1 ;           // Initialize solo MCPT
            }
         qsortdsi ( 0 , npred-1 , sorted_crits , index ) ;
         audit ( "" ) ;
         audit ( "" ) ;
         sprintf_s ( msg, "Initial candidates, in order of decreasing mutual information with %s", var_names[targetvar] ) ;
         audit ( msg ) ;
         audit ( "" ) ;
         audit ( "       Variable         MI" ) ;
         audit ( "" ) ;
         for (i=npred-1 ; i>=0 ; i--) {
            k = index[i] ;
            sprintf_s ( msg, "%15s %12.4lf",
                      var_names[preds[k]], current_crits[k] ) ;
            audit ( msg ) ;
            }
         } // If irep=0 (original, unpermuted run)

      else {                                     // Count for MCPT
         if (sum_relevance >= original_sum_relevance[0])
            ++stepwise_mcpt_count[0] ;
         for (ivar=0 ; ivar<npred ; ivar++) {
            if (relevance[ivar] >= original_relevance[ivar])
               ++solo_mcpt_count[ivar] ;
            }
         } // Permuted


/*
-----------------------------------------------------------------------------------

   Second step: Iterate to add more candidates

   Note that redundancy of a candidate can change as predictors are added.
   This is because the kept set is increasing, so sum_redundancy changes.

-----------------------------------------------------------------------------------
*/

      for (i=0 ; i<npred ; i++)
         sum_redundancy[i] = 0.0 ; // sum_redundancy[i] is the total redundancy of candidate i with kept set

      for (nkept=1 ; nkept<max_pred ; nkept++) {  // Main outermost loop

/*
   Print candidates kept so far (if in unpermuted rep)
*/

         if (irep == 0) {        // Original, unpermuted
            audit ( "" ) ;
            audit ( "" ) ;
            audit ( "Predictors so far   Relevance   Redundancy   Criterion" ) ;
            audit ( "" ) ;
            for (i=0 ; i<nkept ; i++) {
               k = stepwise_ivar[i] ;
               // Cannot print sum_redundancy/nkept here because sum froze but nkept keeps increasing
               sprintf_s ( msg, "%15s %12.4lf %12.4lf %12.4lf",
                         var_names[preds[k]], relevance[k], relevance[k] - stepwise_crit[i], stepwise_crit[i] ) ;
               audit ( msg ) ;
               }
            }

/*
   Build in which_preds the candidates not yet selected
*/

         k = 0 ;                           // Candidate vector is all except those already kept
         for (i=0 ; i<npred ; i++) {
            for (j=0 ; j<nkept ; j++) {
               if (stepwise_ivar[j] == i)
                  break ;
               }
            if (j == nkept)
               which_preds[k++] = i ;
            }
         assert ( k == npred - nkept ) ;

/*
   Compute the MI of the most recently added predictor with each remaining candidate
*/

         if (user_pressed_escape()) {
            ret_val = ERROR_ESCAPE ;
            audit ( "ERROR: User pressed ESCape or other serious error during RELEVANCE MINUS REDUNDANCY" ) ;
            goto FINISH ;
            }

         k = stepwise_ivar[nkept-1] ;   // Index in preds of most recently added candidate
         if (type == SCREEN_RR_TAILS)  // redun_pred_? is trinary
            ret_val = rr_threaded ( type , database , n_vars , preds , NULL ,
                                    mcpt_reps , max_threads , n_cases , NULL , npred-nkept , which_preds ,
                                    3 , redun_pred_bin , redun_pred_marginal ,
                                    3 , redun_pred_bin+k*n_cases , redun_pred_marginal+k*3 ,
                                    crit , bins_dim , bin_counts ) ;
         else {
            if (type == SCREEN_RR_CONTINUOUS) {
               for (i=0 ; i<n_cases ; i++)
                  casework[i] = database[i*n_vars+preds[k]] ;
               }
            ret_val = rr_threaded ( type , database , n_vars , preds , casework ,
                                    mcpt_reps , max_threads , n_cases , NULL , npred-nkept , which_preds ,
                                    nbins_pred , pred_bin , pred_marginal ,
                                    nbins_pred , pred_bin+k*n_cases , pred_marginal+k*nbins_pred ,
                                    crit , bins_dim , bin_counts ) ;
            }

         if (user_pressed_escape()  &&  ret_val == 0)
            ret_val = ERROR_ESCAPE ;

         if (ret_val) {
            audit ( "ERROR: User pressed ESCape or other serious error during RELEVANCE MINUS REDUNDANCY" ) ;
            goto FINISH ;
            }
         
/*
   The redundancy of each remaining candidate with the most recently added predictor is now in crit.
   Cumulate the sum of redundancy.
   Then compute the criteria, sorting and printing if this is the unpermuted replication.
*/

         for (i=0 ; i<npred-nkept ; i++) {
            k = which_preds[i] ;   // Index in preds of this candidate
            sum_redundancy[k] += crit[i] ;
            index[i] = k ;
            sorted_crits[i] = current_crits[i] = relevance[k] - sum_redundancy[k] / nkept ;
            if (i == 0  ||  current_crits[i] > best_crit) {
               best_crit = current_crits[i] ;
               best_ivar = k ;
               }
            }

         stepwise_crit[nkept] = best_crit ;
         stepwise_ivar[nkept] = best_ivar ;
         sum_relevance += relevance[best_ivar] ;

         if (irep == 0) {        // Original, unpermuted
            original_stepwise_crit[nkept] = best_crit ;
            original_stepwise_ivar[nkept] = best_ivar ;
            original_sum_relevance[nkept] = sum_relevance ;
            stepwise_mcpt_count[nkept] = 1 ;

            qsortdsi ( 0 , npred-nkept-1 , sorted_crits , index ) ;
            audit ( "" ) ;
            audit ( "" ) ;
            audit ( "Additional candidates, in order of decreasing relevance minus redundancy" ) ;
            audit ( "" ) ;
            audit ( "       Variable     Relevance   Redundancy   Criterion" ) ;
            audit ( "" ) ;
            for (i=npred-nkept-1 ; i>=0 ; i--) {
               k = index[i] ;
               sprintf_s ( msg, "%15s %12.4lf %12.4lf %12.4lf",
                         var_names[preds[k]], relevance[k], sum_redundancy[k] / nkept,
                         relevance[k] - sum_redundancy[k] / nkept ) ;
               audit ( msg ) ;
               }
            } // If irep=0 (original, unpermuted run)

         else {                                     // Count for MCPT
            if (sum_relevance >= original_sum_relevance[nkept])
               ++stepwise_mcpt_count[nkept] ;
            } // Permuted

            
         } // Second step (for nkept): Iterate to add predictors to kept set

      } // For all MCPT replications


/*
--------------------------------------------------------------------------------

   All computation is finished.  Print.

--------------------------------------------------------------------------------
*/

   audit ( "" ) ;
   audit ( "" ) ;

/*
   Print final list of candidates and p-values
*/

   audit ( "" ) ;
   audit ( "" ) ;
   sprintf_s ( msg, "----------> Final results predicting %s <----------", var_names[targetvar] ) ;
   audit ( msg ) ;
   audit ( "" ) ;
   if (mcpt_reps > 1)
      audit ( "Final predictors    Relevance   Redundancy   Criterion    Solo pval  Group pval" ) ;
   else
      audit ( "Final predictors    Relevance   Redundancy   Criterion" ) ;
   audit ( "" ) ;
   for (i=0 ; i<nkept ; i++) {
      // Cannot print sum_redundancy/nkept here because sum froze but nkept keeps increasing
      k = original_stepwise_ivar[i] ;
      if (mcpt_reps > 1)
         sprintf_s ( msg, "%15s %12.4lf %12.4lf %12.4lf    %8.3lf    %8.3lf",
                   var_names[preds[k]], original_relevance[k], original_relevance[k] - original_stepwise_crit[i], original_stepwise_crit[i],
                   (double) solo_mcpt_count[k] / (double) mcpt_reps,
                   (double) stepwise_mcpt_count[i] / (double) mcpt_reps ) ;
      else
         sprintf_s ( msg, "%15s %12.4lf %12.4lf %12.4lf",
                   var_names[preds[k]], original_relevance[k], original_relevance[k] - original_stepwise_crit[i], original_stepwise_crit[i] ) ;
      audit ( msg ) ;
      }

/*
   Finished.  Clean up and exit.
*/

FINISH:

   if (casework != NULL)
      free ( casework ) ;
   if (mutual != NULL)
      free ( mutual ) ;
   if (index != NULL)
      free ( index ) ;
   if (pred_thresholds != NULL)
      free ( pred_thresholds ) ;
   if (target_thresholds != NULL)
      free ( target_thresholds ) ;
   if (pred_bin != NULL)
      free ( pred_bin ) ;
   if (redun_pred_bin != NULL)
      free ( redun_pred_bin ) ;
   if (redun_pred_marginal != NULL)
      free ( redun_pred_marginal ) ;
   if (work_bin != NULL)
      free ( work_bin ) ;
   if (target_bin != NULL)
      free ( target_bin ) ;
   if (bin_counts != NULL)
      free ( bin_counts ) ;
   if (target != NULL)
      free ( target ) ;
   if (tail_n != NULL)
      free ( tail_n ) ;
   return ret_val ;
}
Exemplo n.º 4
0
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )
{
   int i, ncases, irep, nreps, m, n_lower, n_upper, n_ks2, n_ks_null, n_ks_alt ;
   double *x, pval, conf, pessimistic_lower, pessimistic_upper ;
   double ks_two, ks_one, D, Dp, Dm ;

   if (argc != 5) {
      printf ( "\nUsage: ConfConf  ncases  pval  conf  nreps" ) ;
      printf ( "\n  ncases - Number of cases in the sample" ) ;
      printf ( "\n  pval - Probability value (<0.5) for quantile test" ) ;
      printf ( "\n  conf - Desired confidence value (<0.5) for both tests" ) ;
      printf ( "\n  nreps - Number of replications" ) ;
      exit ( 1 ) ;
      }

   ncases = atoi ( argv[1] ) ;
   pval = atof ( argv[2] ) ;
   conf = atof ( argv[3] ) ;
   nreps = atoi ( argv[4] ) ;

   if (ncases < 10) {
      printf ( "\nERROR.. Must have at least 10 cases" ) ;
      exit ( 1 ) ;
      }

   if (pval * ncases < 1.0  ||  pval >= 0.5) {
      printf ( "\nERROR.. Pval too small or too large" ) ;
      exit ( 1 ) ;
      }

   if (conf <= 0.0  ||  conf >= 0.5) {
      printf ( "\nERROR.. Conf must be greater than 0 and less than 0.5" ) ;
      exit ( 1 ) ;
      }

   if (nreps < 1) {
      printf ( "\nERROR.. Must have at least 1 replication" ) ;
      exit ( 1 ) ;
      }


/*
   Allocate memory and initialize
*/

   x = (double *) malloc ( ncases * sizeof(double) ) ;

   m = (int) (pval * ncases) ;  // Conservative order statistic for bound
   pessimistic_lower = quantile_conf ( ncases , m , conf ) ;
   pessimistic_upper = 1.0 - pessimistic_lower ;
   ks_two = inverse_ks ( ncases , 1.0 - conf ) ;       // Two-tailed test
   ks_one = inverse_ks ( ncases , 1.0 - 2.0 * conf ) ; // One-tailed test

   printf ( "\nSuppose the model predicts values near 0 for the null hypothesis" ) ;
   printf ( "\nand values near 1 for the alternative hypothesis." ) ;

   printf ( "\n\nIf the dataset represents the null hypothesis, the threshold" ) ;
   printf ( "\nfor rejecting the null at p=%.4lf is given by the %d'th order statistic.",
            pval, ncases - m + 1 ) ;
   printf ( "\nThis is a conservative estimate of the %.4lf quantile", 1.0-pval ) ;
   printf ( "\nThere is only a %.4lf chance that it will really be the %.4lf quantile or worse.",
            conf, pessimistic_upper ) ;

   printf ( "\n\nIf the dataset represents the alternative hypothesis, the threshold" ) ;
   printf ( "\nfor rejecting the alt at p=%.4lf is given by the %d'th order statistic.",
            pval, m ) ;
   printf ( "\nThis is a conservative estimate of the %.4lf quantile", pval ) ;
   printf ( "\nThere is only a %.4lf chance that it will really be the %.4lf quantile or worse.",
            conf, pessimistic_lower) ;

   printf ( "\n\nKS thresholds: two-tailed KS = %.4lf  one-tailed KS = %.4lf",
            ks_two, ks_one ) ;

/*
   Now generate nreps samples.  Verify that our required confidence level
   is observed.  Note that the fact that this test uses a uniform distribution
   does not in any way limit its applicability to uniform distributions.
   If one were to generate cases from any other reasonable distribtion,
   the pessimistic quantile bounds would have to be transformed similarly.
   The result is that the inequalities below would pass or fail identically.
   We count the number of times 'disaster' happens.
   Disaster is when the order statistic used for the threshold is toward the
   inside (center) of the distribution, meaning that if this order statistic
   had been used as a threshold, more of the distribution would be outside
   the threshold than the user expected.  We expect disaster to happen with
   probability equal to the specified conf parameter.

   For the two-tailed Kolmogorov-Smirnov test, disaster is when the empirical
   CDF deviates (above or below) from the correct value by more than the
   conf-inspired value.  For the one-tailed test in which the dataset is from
   the NULL distribution, disaster is when the empirical CDF exceeds the true
   CDF, a situation that would encourage false rejection of the null hypothesis.
   This is measured by D+.  For the one-tailed test in which the dataset is from
   the ALT distribution, disaster is when the empirical CDF is less than the
   true CDF, a situation that would encourage false rejection of the alternative
   hypothesis.  This is measured by D-.
*/

   n_lower = n_upper = n_ks2 = n_ks_null = n_ks_alt = 0 ;

   for (irep=0 ; irep<nreps ; irep++) {

      for (i=0 ; i<ncases ; i++)
         x[i] = unifrand () ;
      qsortd ( 0 , ncases-1 , x ) ;

      if (x[m-1] > pessimistic_lower)
         ++n_lower ;

      if (x[ncases-m] < pessimistic_upper)
         ++n_upper ;

      D = ks_test ( ncases , x , &Dp , &Dm ) ;
      if (D > ks_two)
         ++n_ks2 ;
      if (Dp > ks_one)
         ++n_ks_null ;
      if (Dm > ks_one)
         ++n_ks_alt ;
      }

   printf ( "\nPoint failure (expected=%.4lf)  Lower=%.4lf  Upper=%.4lf",
            conf, (double) n_lower / nreps, (double) n_upper / nreps) ;
   printf ( "\nKS failure:  two-tailed = %.4lf  NULL = %.4lf  ALT = %.4lf",
            (double) n_ks2 / nreps, (double) n_ks_null / nreps,
            (double) n_ks_alt / nreps) ;

   free ( x ) ;
   return ( 0 ) ;
}
Exemplo n.º 5
0
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )

{
   int i, j, k, nvars, ncases, irep, nreps, ivar, nties, ties ;
   int n_indep_vars, idep, icand, *index, *mcpt_max_counts, *mcpt_same_counts, *mcpt_solo_counts ;
   double *data, *work, dtemp, *save_info, criterion, *crits ;
   char filename[256], **names, depname[256] ;
   FILE *fp ;
   MutualInformationAdaptive *mi_adapt ;

/*
   Process command line parameters
*/

#if 1
   if (argc != 5) {
      printf ( "\nUsage: MI_ONLY  datafile  n_indep  depname  nreps" ) ;
      printf ( "\n  datafile - name of the text file containing the data" ) ;
      printf ( "\n             The first line is variable names" ) ;
      printf ( "\n             Subsequent lines are the data." ) ;
      printf ( "\n             Delimiters can be space, comma, or tab" ) ;
      printf ( "\n  n_indep - Number of independent vars, starting with the first" ) ;
      printf ( "\n  depname - Name of the 'dependent' variable" ) ;
      printf ( "\n            It must be AFTER the first n_indep variables" ) ;
      printf ( "\n  nreps - Number of Monte-Carlo permutations, including unpermuted" ) ;
      exit ( 1 ) ;
      }

   strcpy ( filename , argv[1] ) ;
   n_indep_vars = atoi ( argv[2] ) ;
   strcpy ( depname , argv[3] ) ;
   nreps = atoi ( argv[4] ) ;
#else
   strcpy ( filename , "..\\SYNTH.TXT" ) ;
   n_indep_vars = 7 ;
   strcpy ( depname , "SUM1234" ) ;
   nreps = 100 ;
#endif

   _strupr ( depname ) ;

/*
   These are used by MEM.CPP for runtime memory validation
*/

   _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ;
   fp = fopen ( mem_file_name , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MEM.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }
   fclose ( fp ) ;
   mem_keep_log = 0 ;  // Change this to 1 to keep a memory use log (slows execution!)
   mem_max_used = 0 ;

/*
   Open the text file to which results will be written
*/

   fp = fopen ( "MI_ONLY.LOG" , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MI_ONLY.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }

/*
   Read the file and locate the index of the dependent variable
*/

   if (readfile ( filename , &nvars , &names , &ncases , &data ))
      return EXIT_FAILURE ;

   for (idep=0 ; idep<nvars ; idep++) {
      if (! strcmp ( depname , names[idep] ))
         break ;
      }

   if (idep == nvars) {
      printf ( "\nERROR... Dependent variable %s is not in file", depname ) ;
      return EXIT_FAILURE ;
      }

   if (idep < n_indep_vars) {
      printf ( "\nERROR... Dependent variable %s must be beyond independent vars",
               depname ) ;
      return EXIT_FAILURE ;
      }

/*
   Check each variable for ties.  This is not needed for the algorithm,
   but it is good to warn the user, because more than a very few tied values
   in any variable seriously degrades performance of the adaptive partitioning algorithm.
*/

   MEMTEXT ( "MI_ONLY: Work" ) ;
   work = (double *) MALLOC ( ncases * sizeof(double) ) ;
   assert ( work != NULL ) ;

   ties = 0 ;
   assert ( work != NULL ) ;
   for (ivar=0 ; ivar<nvars ; ivar++) {
      if (ivar > n_indep_vars  &&  ivar != idep)
         continue ; // Check only the variables selected by the user
      for (i=0 ; i<ncases ; i++)
         work[i] = data[i*nvars+ivar] ;
      qsortd ( 0 , ncases-1 , work ) ;
      nties = 0 ;
      for (i=1 ; i<ncases ; i++) {
         if (work[i] == work[i-1])
            ++nties ;
         }
      if ((double) nties / (double) ncases > 0.05) {
         ++ties ;
         fprintf ( fp , "\nWARNING... %s has %.2lf percent ties!",
                   names[ivar], 100.0 * nties / (double) ncases ) ;
         }
      } // For all variables
   if (ties) {
      fprintf ( fp , "\nThe presence of ties will seriously degrade" ) ;
      fprintf ( fp , "\nperformance of the adaptive partitioning algorithm\n\n" ) ;
      }

/*
   Allocate scratch memory and create the MutualInformation object using the
   dependent variable

   crits - Mutual information criterion
   index - Indices that sort the criterion
   save_info - Ditto, this is univariate information, to be sorted
   mi_adapt - The MutualInformation object, constructed with the 'dependent' variable
*/

   MEMTEXT ( "MI_ONLY work allocs plus MutualInformation" ) ;
   crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( crits != NULL ) ;
   index = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( index != NULL ) ;
   mcpt_max_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_max_counts != NULL ) ;
   mcpt_same_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_same_counts != NULL ) ;
   mcpt_solo_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_solo_counts != NULL ) ;
   save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( save_info != NULL ) ;

   for (irep=0 ; irep<nreps ; irep++) {

      for (i=0 ; i<ncases ; i++)            // Get the 'dependent' variable
         work[i] = data[i*nvars+idep] ;

//    Shuffle dependent variable if in permutation run (irep>0)

      if (irep) {                   // If doing permuted runs, shuffle
         i = ncases ;              // Number remaining to be shuffled
         while (i > 1) {            // While at least 2 left to shuffle
            j = (int) (unifrand () * i) ;
            if (j >= i)
               j = i - 1 ;
            dtemp = work[--i] ;
            work[i] = work[j] ;
            work[j] = dtemp ;
            }
         }

      // Here we use a tiny split theshold (instead of the usual 6.0) so that it picks up
      // small amounts of mutual information (perhaps including noise).
      // If we used 6.0, nearly all permutations of any reasonably sized dataset
      // would have a computed mutual information of zero.  It's safe picking up
      // some noise because the permutation test will account for this.

      mi_adapt = new MutualInformationAdaptive ( ncases , work , 1 , 0.1 ) ; // Deliberately tiny for low information
      assert ( mi_adapt != NULL ) ;

/*
   Compute and save the mutual information for the dependent variable
   with each individual independent variable candidate.
*/

      for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
         for (i=0 ; i<ncases ; i++)
            work[i] = data[i*nvars+icand] ;

         criterion = mi_adapt->mut_inf ( work , 1 ) ;

         save_info[icand] = criterion ; // We will sort this when all candidates are done
                                        
         if (irep == 0) {               // If doing original (unpermuted), save criterion
            index[icand] = icand ;      // Will need original indices when criteria are sorted
            crits[icand] = criterion ;
            mcpt_max_counts[icand] = mcpt_same_counts[icand] = mcpt_solo_counts[icand] = 1 ;  // This is >= itself so count it now
            }
         else {
            if (criterion >= crits[icand])
               ++mcpt_solo_counts[icand] ;
            }
         } // Initial list of all candidates

      delete mi_adapt ;
      mi_adapt = NULL ;

      if (irep == 0)  // Find the indices that sort the candidates per criterion
         qsortdsi ( 0 , n_indep_vars-1 , save_info , index ) ;

      else {
         qsortd ( 0 , n_indep_vars-1 , save_info ) ;
         for (icand=0 ; icand<n_indep_vars ; icand++) {
            if (save_info[icand] >= crits[index[icand]])
               ++mcpt_same_counts[index[icand]] ;
            if (save_info[n_indep_vars-1] >= crits[index[icand]]) // Valid only for largest
               ++mcpt_max_counts[index[icand]] ;
            }
         }

      }  // For all reps

   fprintf ( fp , "\nAdaptive partitioning mutual information of %s", depname);

   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\nPredictors, in order of decreasing mutual information" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n                       Variable   Information   Solo pval   Min pval   Max pval" ) ;

   for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates
      k = index[n_indep_vars-1-icand] ;           // Index of sorted candidate
      fprintf ( fp , "\n%31s %11.5lf %12.4lf %10.4lf %10.4lf", names[k], crits[k],
                (double) mcpt_solo_counts[k] / nreps,
                (double) mcpt_same_counts[k] / nreps,
                (double) mcpt_max_counts[k] / nreps ) ;
      }

   MEMTEXT ( "MI_ONLY: Finish" ) ;
   fclose ( fp ) ;
   FREE ( work ) ;
   FREE ( crits ) ;
   FREE ( index ) ;
   FREE ( mcpt_max_counts ) ;
   FREE ( mcpt_same_counts ) ;
   FREE ( mcpt_solo_counts ) ;
   FREE ( save_info ) ;
   free_data ( nvars , names , data ) ;

   MEMCLOSE () ;
   printf ( "\n\nPress any key..." ) ;
   _getch () ;
   return EXIT_SUCCESS ;
}
Exemplo n.º 6
0
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )

{
   int i, j, k, nvars, ncases, irep, nreps, nbins, nbins_dep, nbins_indep, *count ;
   int n_indep_vars, idep, icand, *index, *mcpt_max_counts, *mcpt_same_counts, *mcpt_solo_counts ;
   short int *bins_dep, *bins_indep ;
   double *data, *work, dtemp, *save_info, criterion, *crits ;
   double *ab, *bc, *b ;
   char filename[256], **names, depname[256] ;
   FILE *fp ;

/*
   Process command line parameters
*/

#if 1
   if (argc != 6) {
      printf ( "\nUsage: TRANSFER  datafile  n_indep  depname  nreps" ) ;
      printf ( "\n  datafile - name of the text file containing the data" ) ;
      printf ( "\n             The first line is variable names" ) ;
      printf ( "\n             Subsequent lines are the data." ) ;
      printf ( "\n             Delimiters can be space, comma, or tab" ) ;
      printf ( "\n  n_indep - Number of independent vars, starting with the first" ) ;
      printf ( "\n  depname - Name of the 'dependent' variable" ) ;
      printf ( "\n            It must be AFTER the first n_indep variables" ) ;
      printf ( "\n  nbins - Number of bins for all variables" ) ;
      printf ( "\n  nreps - Number of Monte-Carlo permutations, including unpermuted" ) ;
      exit ( 1 ) ;
      }

   strcpy ( filename , argv[1] ) ;
   n_indep_vars = atoi ( argv[2] ) ;
   strcpy ( depname , argv[3] ) ;
   nbins = atoi ( argv[4] ) ;
   nreps = atoi ( argv[5] ) ;
#else
   strcpy ( filename , "..\\SYNTH.TXT" ) ;
   n_indep_vars = 7 ;
   strcpy ( depname , "SUM1234" ) ;
   nbins = 2 ;
   nreps = 1 ;
#endif

   _strupr ( depname ) ;

/*
   These are used by MEM.CPP for runtime memory validation
*/

   _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ;
   fp = fopen ( mem_file_name , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MEM.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }
   fclose ( fp ) ;
   mem_keep_log = 1 ;  // Change this to 1 to keep a memory use log (slows execution!)
   mem_max_used = 0 ;

/*
   Open the text file to which results will be written
*/

   fp = fopen ( "TRANSFER.LOG" , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open TRANSFER.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }

/*
   Read the file and locate the index of the dependent variable
*/

   if (readfile ( filename , &nvars , &names , &ncases , &data ))
      return EXIT_FAILURE ;

   for (idep=0 ; idep<nvars ; idep++) {
      if (! strcmp ( depname , names[idep] ))
         break ;
      }

   if (idep == nvars) {
      printf ( "\nERROR... Dependent variable %s is not in file", depname ) ;
      return EXIT_FAILURE ;
      }

   if (idep < n_indep_vars) {
      printf ( "\nERROR... Dependent variable %s must be beyond independent vars",
               depname ) ;
      return EXIT_FAILURE ;
      }

/*
   Allocate scratch memory

   crits - Transfer Entropy criterion
   index - Indices that sort the criterion
   save_info - Ditto, this is univariate criteria, to be sorted
*/

   MEMTEXT ( "TRANSFER work allocs" ) ;
   work = (double *) MALLOC ( ncases * sizeof(double) ) ;
   assert ( work != NULL ) ;
   crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( crits != NULL ) ;
   index = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( index != NULL ) ;
   bins_indep = (short int *) MALLOC ( ncases * sizeof(short int) ) ;
   assert ( bins_indep != NULL ) ;
   bins_dep = (short int *) MALLOC ( ncases * sizeof(short int) ) ;
   assert ( bins_dep != NULL ) ;
   mcpt_max_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_max_counts != NULL ) ;
   mcpt_same_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_same_counts != NULL ) ;
   mcpt_solo_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( mcpt_solo_counts != NULL ) ;
   save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( save_info != NULL ) ;
   count = (int *) MALLOC ( nbins * nbins * nbins * sizeof(int) ) ;
   assert ( count != NULL ) ;
   ab = (double *) MALLOC ( nbins * nbins * sizeof(double) ) ;
   assert ( ab != NULL ) ;
   bc = (double *) MALLOC ( nbins * nbins * sizeof(double) ) ;
   assert ( bc != NULL ) ;
   b = (double *) MALLOC ( nbins * sizeof(double) ) ;
   assert ( b != NULL ) ;

/*
   Get the dependent variable and partition it
*/

   for (i=0 ; i<ncases ; i++)            // Get the 'dependent' variable
      work[i] = data[i*nvars+idep] ;

   nbins_dep = nbins ;
   partition ( ncases , work , &nbins_dep , NULL , bins_dep ) ;

/*
   Replication loop is here
*/

   for (irep=0 ; irep<nreps ; irep++) {

/*
   Compute and save the transfer entropy of the dependent variable
   with each individual independent variable candidate.
*/

      for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
         for (i=0 ; i<ncases ; i++)
            work[i] = data[i*nvars+icand] ;

         //    Shuffle independent variable if in permutation run (irep>0)

         if (irep) {                   // If doing permuted runs, shuffle
            i = ncases ;               // Number remaining to be shuffled
            while (i > 1) {            // While at least 2 left to shuffle
               j = (int) (unifrand () * i) ;
               if (j >= i)
                  j = i - 1 ;
               dtemp = work[--i] ;
               work[i] = work[j] ;
               work[j] = dtemp ;
               }
            }

         nbins_indep = nbins ;
         partition ( ncases , work , &nbins_indep , NULL , bins_indep ) ;

         criterion = trans_ent ( ncases , nbins_indep , nbins_dep ,
                                 bins_indep , bins_dep ,
                                 0 , 1 , 1 , count , ab , bc , b ) ;

         save_info[icand] = criterion ; // We will sort this when all candidates are done
                                        
         if (irep == 0) {               // If doing original (unpermuted), save criterion
            index[icand] = icand ;      // Will need original indices when criteria are sorted
            crits[icand] = criterion ;
            mcpt_max_counts[icand] = mcpt_same_counts[icand] = mcpt_solo_counts[icand] = 1 ;  // This is >= itself so count it now
            }
         else {
            if (criterion >= crits[icand])
               ++mcpt_solo_counts[icand] ;
            }
         } // Initial list of all candidates

      if (irep == 0)  // Find the indices that sort the candidates per criterion
         qsortdsi ( 0 , n_indep_vars-1 , save_info , index ) ;

      else {
         qsortd ( 0 , n_indep_vars-1 , save_info ) ;
         for (icand=0 ; icand<n_indep_vars ; icand++) {
            if (save_info[icand] >= crits[index[icand]])
               ++mcpt_same_counts[index[icand]] ;
            if (save_info[n_indep_vars-1] >= crits[index[icand]]) // Valid only for largest
               ++mcpt_max_counts[index[icand]] ;
            }
         }

      }  // For all reps

   fprintf ( fp , "\nTransfer entropy of %s", depname);

   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\nPredictors, in order of decreasing transfer entropy" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n                       Variable   Information   Solo pval   Min pval   Max pval" ) ;

   for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates
      k = index[n_indep_vars-1-icand] ;           // Index of sorted candidate
      fprintf ( fp , "\n%31s %11.5lf %12.4lf %10.4lf %10.4lf", names[k], crits[k],
                (double) mcpt_solo_counts[k] / nreps,
                (double) mcpt_same_counts[k] / nreps,
                (double) mcpt_max_counts[k] / nreps ) ;
      }

   MEMTEXT ( "TRANSFER: Finish" ) ;
   fclose ( fp ) ;
   FREE ( work ) ;
   FREE ( crits ) ;
   FREE ( index ) ;
   FREE ( bins_indep ) ;
   FREE ( bins_dep ) ;
   FREE ( mcpt_max_counts ) ;
   FREE ( mcpt_same_counts ) ;
   FREE ( mcpt_solo_counts ) ;
   FREE ( save_info ) ;
   FREE ( count ) ;
   FREE ( ab ) ;
   FREE ( bc ) ;
   FREE ( b ) ;
   free_data ( nvars , names , data ) ;

   MEMCLOSE () ;
   printf ( "\n\nPress any key..." ) ;
   _getch () ;
   return EXIT_SUCCESS ;
}
Exemplo n.º 7
0
int main (
   int argc ,    // Number of command line arguments (includes prog name)
   char *argv[]  // Arguments (prog name is argv[0])
   )

{
   int i, j, k, nvars, ncases, ndiv, maxkept, ivar, nties, ties ;
   int n_indep_vars, idep, icand, iother, ibest, *sortwork, nkept, *kept ;
   double *data, *work ;
   double *save_info, *univar_info, *pair_info, bestredun, redun, bestcrit ;
   double criterion, relevance, redundancy, *crits, *reduns ;
   char filename[256], **names, depname[256] ;
   char trial_name[256], *pair_found ;
   FILE *fp ;
   MutualInformationParzen *mi_parzen ;
   MutualInformationAdaptive *mi_adapt ;

/*
   Process command line parameters
*/

#if 1
   if (argc != 6) {
      printf ( "\nUsage: MI_CONT  datafile  n_indep  depname  ndiv  maxkept" ) ;
      printf ( "\n  datafile - name of the text file containing the data" ) ;
      printf ( "\n             The first line is variable names" ) ;
      printf ( "\n             Subsequent lines are the data." ) ;
      printf ( "\n             Delimiters can be space, comma, or tab" ) ;
      printf ( "\n  n_indep - Number of independent vars, starting with the first" ) ;
      printf ( "\n  depname - Name of the 'dependent' variable" ) ;
      printf ( "\n            It must be AFTER the first n_indep variables" ) ;
      printf ( "\n  ndiv - Normally zero, to employ adaptive partitioning" ) ;
      printf ( "\n         Specify 5 (for very few cases) to 15 (for an" ) ;
      printf ( "\n         enormous number of cases) to use Parzen windows" ) ;
      printf ( "\n  maxkept - Stepwise will allow at most this many predictors" ) ;
      exit ( 1 ) ;
      }

   strcpy ( filename , argv[1] ) ;
   n_indep_vars = atoi ( argv[2] ) ;
   strcpy ( depname , argv[3] ) ;
   ndiv = atoi ( argv[4] ) ;
   maxkept = atoi ( argv[5] ) ;
#else
   strcpy ( filename , "..\\VARS.TXT" ) ;
   n_indep_vars = 8 ;
   strcpy ( depname , "DAY_RETURN" ) ;
   ndiv = 0 ;
   maxkept = 5 ;
#endif

   _strupr ( depname ) ;

/*
   These are used by MEM.CPP for runtime memory validation
*/

   _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ;
   fp = fopen ( mem_file_name , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MEM.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }
   fclose ( fp ) ;
   mem_keep_log = 1 ;
   mem_max_used = 0 ;

/*
   Open the text file to which results will be written
*/

   fp = fopen ( "MI_CONT.LOG" , "wt" ) ;
   if (fp == NULL) { // Should never happen
      printf ( "\nCannot open MI_CONT.LOG file for writing!" ) ;
      return EXIT_FAILURE ;
      }

/*
   Read the file and locate the index of the 'dependent' variable
*/

   if (readfile ( filename , &nvars , &names , &ncases , &data ))
      return EXIT_FAILURE ;

   for (idep=0 ; idep<nvars ; idep++) {
      if (! strcmp ( depname , names[idep] ))
         break ;
      }

   if (idep == nvars) {
      printf ( "\nERROR... Dependent variable %s is not in file", depname ) ;
      return EXIT_FAILURE ;
      }

   if (idep < n_indep_vars) {
      printf ( "\nERROR... Dependent variable %s must be beyond independent vars",
               depname ) ;
      return EXIT_FAILURE ;
      }

/*
   If adaptive partitioning is specified, check each variable for ties.
   This is not needed for the algorithm, but it is good to warn the
   user, because more than a very few tied values in any variable seriously
   degrades performance of the adaptive partitioning algorithm.
*/

   MEMTEXT ( "MI_CONT: Work" ) ;
   work = (double *) MALLOC ( ncases * sizeof(double) ) ;
   assert ( work != NULL ) ;

   if (ndiv == 0) {  // If adaptive partitioning, check for ties
      ties = 0 ;
      assert ( work != NULL ) ;
      for (ivar=0 ; ivar<nvars ; ivar++) {
         if (ivar > n_indep_vars  &&  ivar != idep)
            continue ; // Check only the variables selected by the user
         for (i=0 ; i<ncases ; i++)
            work[i] = data[i*nvars+ivar] ;
         qsortd ( 0 , ncases-1 , work ) ;
         nties = 0 ;
         for (i=1 ; i<ncases ; i++) {
            if (work[i] == work[i-1])
               ++nties ;
            }
         if ((double) nties / (double) ncases > 0.05) {
            ++ties ;
            fprintf ( fp , "\nWARNING... %s has %.2lf percent ties!",
                      names[ivar], 100.0 * nties / (double) ncases ) ;
            }
         } // For all variables
      if (ties) {
         fprintf ( fp , "\nThe presence of ties will seriously degrade" ) ;
         fprintf ( fp , "\nperformance of the adaptive partitioning algorithm\n\n" ) ;
         }
      } // If adaptive partitioning, so testing for ties in the data

/*
   Allocate scratch memory and create the MutualInformation object using the
   dependent variable

   kept - Array of indices of variables kept so far
   crits - Ditto, criterion
   reduns - Ditto, redundancy
   sortwork - Temporary use for printing variable's information sorted
   save_info - Ditto, this is univariate information, to be sorted
   univar_info - Also univariate information, but not sorted, for use in stepwise
   pair_found - Flag: is there valid info in the corresponding element of the next array
   pair_info - Preserve pairwise information of indeps to avoid expensive recalculation
   mi_parzen - The MutualInformation object, constructed with the 'dependent' variable
   mi_adapt - Ditto, but used if adaptive partitioning
*/

   MEMTEXT ( "MI_CONT 6 allocs plus MutualInformation" ) ;
   kept = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( kept != NULL ) ;
   crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( crits != NULL ) ;
   reduns = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( reduns != NULL ) ;
   sortwork = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ;
   assert ( sortwork != NULL ) ;
   save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( save_info != NULL ) ;
   univar_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ;
   assert ( univar_info != NULL ) ;
   pair_found = (char *) MALLOC ( (n_indep_vars * (n_indep_vars+1) / 2) * sizeof(char) ) ;
   assert ( pair_found != NULL ) ;
   pair_info = (double *) MALLOC ( (n_indep_vars * (n_indep_vars+1) / 2) * sizeof(double) ) ;
   assert ( pair_info != NULL ) ;

   for (i=0 ; i<ncases ; i++)            // Get the 'dependent' variable
      work[i] = data[i*nvars+idep] ;

   if (ndiv > 0) {
      mi_parzen = new MutualInformationParzen ( ncases , work , ndiv ) ;
      mi_adapt = NULL ;
      assert ( mi_parzen != NULL ) ;
      }
   else {
      mi_adapt = new MutualInformationAdaptive ( ncases , work , 0 , 6.0 ) ;
      mi_parzen = NULL ;
      assert ( mi_adapt != NULL ) ;
      }

   memset ( pair_found , 0 , (n_indep_vars * (n_indep_vars+1) / 2) * sizeof(char) ) ;

   if (ndiv > 0)
      fprintf ( fp , "\nParzen mutual information of %s (ndiv=%d)", depname, ndiv);
   else
      fprintf ( fp , "\nAdaptive partitioning mutual information of %s", depname);

   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n---------------------------------------------------------------" ) ;
   fprintf ( fp , "\n" ) ;

/*
   Compute and save the mutual information for the dependent variable with
   each individual independent variable candidate.  Print the results,
   sort them, and print them again, this time sorted.
*/

   fprintf ( fp , "\nInitial candidates, in order of appearance in data file" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n                       Variable   Information" ) ;

   for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
      for (i=0 ; i<ncases ; i++)
         work[i] = data[i*nvars+icand] ;

      if (ndiv > 0)
         criterion = mi_parzen->mut_inf ( work ) ;
      else
         criterion = mi_adapt->mut_inf ( work , 0 ) ;

      printf ( "\n%s = %.5lf", names[icand], criterion ) ;
      fprintf ( fp , "\n%31s   %.5lf", names[icand], criterion ) ;

      sortwork[icand] = icand ;
      save_info[icand] = univar_info[icand] = criterion ;
      } // Initial list of all candidates

   if (mi_parzen != NULL) {
      delete mi_parzen ;
      mi_parzen = NULL ;
      }
   if (mi_adapt != NULL) {
      delete mi_adapt ;
      mi_adapt = NULL ;
      }

   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\nInitial candidates, in order of decreasing mutual information" ) ;
   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\n                       Variable   Information" ) ;

   qsortdsi ( 0 , n_indep_vars-1 , save_info , sortwork ) ;
   for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates
      k = sortwork[n_indep_vars-1-icand] ;        // Index of sorted candidate
      fprintf ( fp , "\n%31s   %.5lf", names[k], save_info[n_indep_vars-1-icand] ) ;
      }

/*
   Initialize the 'kept' set to be the best variable, and then begin the
   main outer loop that adds variables one at a time
*/

   kept[0] = sortwork[n_indep_vars-1] ;  // Index of best single candidate
   crits[0] = save_info[n_indep_vars-1] ;
   reduns[0] = 0.0 ;
   nkept = 1 ;

   if (maxkept > n_indep_vars)  // Guard against silly user
      maxkept = n_indep_vars ;

   while (nkept < maxkept) {

      fprintf ( fp , "\n" ) ;
      fprintf ( fp , "\nVariables so far                 Relevance  Redundancy  Criterion" ) ;
      for (i=0 ; i<nkept ; i++)
         fprintf ( fp , "\n%31s %10.5lf %10.5lf %10.5lf",
                   names[kept[i]], crits[i] + reduns[i], reduns[i], crits[i] ) ;
      fprintf ( fp , "\n" ) ;
      fprintf ( fp , "\nSearching for an additional candidate..." ) ;
      fprintf ( fp , "\n" ) ;
      fprintf ( fp , "\n                       Variable  Relevance  Redundancy  Criterion" ) ;

      bestcrit = -1.e60 ;
      for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
         for (i=0 ; i<nkept ; i++) {  // Is this candidate already kept?
            if (kept[i] == icand)
               break ;
            }
         if (i < nkept)  // If this candidate 'icand' is already kept
            continue ;   // Skip it

         strcpy ( trial_name , names[icand] ) ;   // Its name for printing
         for (i=0 ; i<ncases ; i++)               // Get its cases
            work[i] = data[i*nvars+icand] ;

         if (ndiv > 0) {
            mi_parzen = new MutualInformationParzen ( ncases , work , ndiv ) ;
            mi_adapt = NULL ;
            assert ( mi_parzen != NULL ) ;
            }
         else {
            mi_adapt = new MutualInformationAdaptive ( ncases , work , 0 , 6.0 ) ;
            mi_parzen = NULL ;
            assert ( mi_adapt != NULL ) ;
            }

         relevance = univar_info[icand] ; // We saved it during initial printing
         printf ( "\n%s relevance = %.5lf", trial_name, relevance ) ;

         // Compute the redundancy of this candidate
         // This is the mean of its redundancy with all kept variables
         redundancy = 0.0 ;
         for (iother=0 ; iother<nkept ; iother++) {  // Process entire kept set
            j = kept[iother] ;           // Index of a variable in the kept set
            if (icand > j)               // pair_found and pair_info are
               k = icand*(icand+1)/2+j ; // symmetric, so k is the index
            else                         // into them
               k = j*(j+1)/2+icand ;
            if (pair_found[k])           // If we already computed it
               redun = pair_info[k] ;    // Don't do it again
            else {                       // First time for this pair, so compute
               for (i=0 ; i<ncases ; i++)       // Get its cases
                  work[i] = data[i*nvars+j] ;   // Variable already in kept set
               if (ndiv > 0)
                  redun = mi_parzen->mut_inf ( work ) ;
               else
                  redun = mi_adapt->mut_inf ( work , 0 ) ;
               pair_found[k] = 1 ;       // Flag that this pair has been computed
               pair_info[k] = redun ;    // And save the MI for this pair
               } // Else must compute redundancy
            redundancy += redun ;
            printf ( "\n  %s <-> %s redundancy = %.5lf", names[icand], names[j], redun ) ;
            } // For all kept variables, computing mean redundancy

         if (mi_parzen != NULL) {
            delete mi_parzen ;
            mi_parzen = NULL ;
            }

         if (mi_adapt != NULL) {
            delete mi_adapt ;
            mi_adapt = NULL ;
            }

         redundancy /= nkept ;  // It is the mean across all kept
         printf ( "\nRedundancy = %.5lf", redundancy ) ;

         criterion = relevance - redundancy ;
         fprintf ( fp , "\n%31s %10.5lf %10.5lf %10.5lf",
                   trial_name, relevance, redundancy, criterion ) ;

         if (criterion > bestcrit) { // Did we just set a new record?
            bestcrit = criterion ;   // If so, update the record
            bestredun = redundancy ; // Needed for printing results later
            ibest = icand ;          // Keep track of the winning candidate
            }

         } // For all candidates

      // We now have the best candidate
      if (bestcrit <= 0.0)
         break ;
      kept[nkept] = ibest ;
      crits[nkept] = bestcrit ;
      reduns[nkept] = bestredun ;
      ++nkept ;
      } // While adding new variables

   fprintf ( fp , "\n" ) ;
   fprintf ( fp , "\nFinal set                        Relevance  Redundancy  Criterion" ) ;
   for (i=0 ; i<nkept ; i++)
      fprintf ( fp , "\n%31s %10.5lf %10.5lf %10.5lf",
                names[kept[i]], crits[i] + reduns[i], reduns[i], crits[i] ) ;


   MEMTEXT ( "MI_CONT: Finish" ) ;
   fclose ( fp ) ;
   FREE ( work ) ;
   FREE ( kept ) ;
   FREE ( crits ) ;
   FREE ( reduns ) ;
   FREE ( sortwork ) ;
   FREE ( save_info ) ;
   FREE ( univar_info ) ;
   FREE ( pair_found ) ;
   FREE ( pair_info ) ;
   if (mi_parzen != NULL)
      delete mi_parzen ;
   if (mi_adapt != NULL)
      delete mi_adapt ;
   free_data ( nvars , names , data ) ;

   MEMCLOSE () ;
   printf ( "\n\nPress any key..." ) ;
   _getch () ;
   return EXIT_SUCCESS ;
}