//******************************************************************* // WinMain - Neural main // // parameters: // hInstance - The instance of this instance of this // application. // hPrevInstance - The instance of the previous instance // of this application. This will be 0 // if this is the first instance. // lpszCmdLine - A long pointer to the command line that // started this application. // cmdShow - Indicates how the window is to be shown // initially. ie. SW_SHOWNORMAL, SW_HIDE, // SW_MIMIMIZE. // // returns: // wParam from last message. // //******************************************************************* int PASCAL WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpszCmdLine, int cmdShow) { /* Declarations of local variables */ int control_file_number = -1 ; // Stack pointer for control files FILE *control_files[MAX_CONTROL_FILES] ; // This is the stack char *control_line ; // User's commands here char *command, *rest ; // Pointers to its command and parameter parts int n_command, n_rest ; // Lengths of those parts int net_model = -1 ; // Network model (see NETMOD_? in CONST.H) int out_model = -1 ; // Output model (see OUTMOD_? in CONST.H) int n_inputs = -1 ; // Number of input neurons int n_outputs = -1 ; // Number of output neurons int n_hidden1 = -1 ; // Number of hidden layer one neurons int n_hidden2 = -1 ; // Ditto layer 2 (0 if just one hidden layer) TrainingSet *tset = NULL ; // Training set here Network *network = NULL ; // Network here struct LearnParams learn_params ; // General learning parameters struct AnnealParams anneal_params ; // Simulated annealing parameters struct GenInitParams geninit_params ; // Genetic initialization parameters struct KohParams koh_params ; // Kohonen parameters int classif_output = -1 ; // Current class (0=reject) for classif training char out_file[80] = "" ; // File for EXECUTE output float threshold ; // CLASSIFY confusion reject cutoff char resp_file[80]=""; // file for initializing output neuron's name char train_file[80]=""; /* Miscellaneous variables */ int i, n, m ; float p ; char *msg ; FILE *fp ; unsigned long me,mc; char *fname; char *control; #if VERSION_16_BIT if (sizeof(int) > 2) { printf ( "\nRecompile with VERSION_16_BIT set to 0 in CONST.H" ) ; exit ( 1 ) ; } #else if (sizeof(int) < 4) { printf ( "\nRecompile with VERSION_16_BIT set to 1 in CONST.H" ) ; exit ( 1 ) ; } #endif printf ( "\nNEURAL SYSTEM - Program to train and test neural networks" ) ; if (argc>1) { strcpy(fname,argv[1]); } /* Process command line parameters */ mem_name[0] = 0 ; // Default is no memory allocation file /* if (strlen ( mem_name )) { strcat ( mem_name , ":mem.log" ) ; fp = fopen ( mem_name , "wt" ) ; if (fp == NULL) { printf ( "\nCannot open debugging file %s", mem_name ) ; exit ( 1 ) ; } fclose ( fp ) ; mem_log = 1 ; } else mem_log = 0 ; */ mem_log = 0 ; mem_used = 0 ; /* Initialize defaults */ learn_params.init = -1 ; learn_params.quit_err = 0.0 ; learn_params.retries = 32767 ; anneal_params.temps0 = 3 ; anneal_params.temps = 4 ; anneal_params.iters0 = 50 ; anneal_params.iters = 20 ; anneal_params.setback0 = 50 ; anneal_params.setback = 20 ; anneal_params.start0 = 3.0 ; anneal_params.start = 4.0 ; anneal_params.stop0 = 1.0 ; anneal_params.stop = 0.02 ; geninit_params.pool = 50 ; geninit_params.gens = 3 ; geninit_params.climb = 0 ; geninit_params.overinit = 1.5 ; geninit_params.pcross = 0.8 ; geninit_params.pmutate = 0.0001 ; koh_params.normalization = 0 ; // 0=multiplicative, 1=Z koh_params.learn_method = 1 ; // 0=additive, 1=subtractive koh_params.rate = 0.4 ; // learning rate koh_params.reduction = 0.99 ; // learning rate reduction learn_params.ap = &anneal_params ; learn_params.gp = &geninit_params ; learn_params.kp = &koh_params ; act_func_init () ; // Initialize interpolation table for activation function MEMTEXT ( "NEURAL: control_line, msg" ) ; if (((control_line = (char *) MALLOC ( CONTROL_LINE_LENGTH+1 )) == NULL) || ((msg = (char *) MALLOC ( CONTROL_LINE_LENGTH+1 )) == NULL)) { printf ( "\nInsufficient memory" ) ; exit ( 1 ) ; } /* Main loop processes all commands */ for (;;) { if (argv[1]) { strcpy(control_line,"CONTROL:"); strcat(control_line,fname); //printf("%s\n",control_line); argv[1]=NULL; } else get_control_line ( control_line , &control_file_number, control_files ) ; split_control_line ( control_line , &command , &n_command , &rest , &n_rest ) ; if (! n_command) { if (n_rest) { sprintf ( msg , "No colon after command: %s", rest ) ; error_message ( msg ) ; } continue ; } sprintf ( msg , "%s : %s", command, rest ) ; normal_message ( msg ) ; /* Act on the command */ if (! strcmp ( command , "QUIT" )) break ; if (! strcmp ( command , "CONTROL" )) { stack_control_file (rest, &control_file_number, control_files) ; continue ; } if (! strcmp ( command , "NETWORK MODEL" )) { // Multi layer network if (! strcmp ( rest , "LAYER" )) n = NETMOD_LAYER ; // Kohonen network else if (! strcmp ( rest , "KOHONEN" )) n = NETMOD_KOH ; // Hopfield network else if (! strcmp ( rest , "HOPFIELD" )) n = NETMOD_HOP ; // Bidirectionnal associative memory network else if (! strcmp ( rest , "BAM" )) n = NETMOD_BAM ; else { sprintf ( msg , "Illegal NETWORK MODEL: %s", rest ) ; error_message ( msg ) ; continue ; } if (net_model == n) continue ; if (ok_to_clear_weights( &network )) { net_model = n ; learn_params.init = -1 ; } else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "OUTPUT MODEL" )) { if (! strcmp ( rest , "CLASSIFY" )) n = OUTMOD_CLASSIFY ; else if (! strcmp ( rest , "AUTO" )) n = OUTMOD_AUTO ; else if (! strcmp ( rest , "GENERAL" )) n = OUTMOD_GENERAL ; else { sprintf ( msg , "Illegal OUTPUT MODEL: %s", rest ) ; error_message ( msg ) ; continue ; } if (out_model == n) continue ; if ((ok_to_clear_tset( &tset )) && (ok_to_clear_weights( &network))) out_model = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N INPUTS" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n <= 0) || (n > MAX_INPUTS)) { sprintf ( msg , "Illegal N INPUTS: %s", rest ) ; error_message ( msg ) ; continue ; } if (n_inputs == n) continue ; if ((ok_to_clear_tset( &tset)) && (ok_to_clear_weights(&network))) n_inputs = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N OUTPUTS" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n <= 0) || (n > MAX_OUTPUTS)) { sprintf ( msg , "Illegal N OUTPUTS: %s", rest ) ; error_message ( msg ) ; continue ; } if (n_outputs == n) continue ; if ((ok_to_clear_tset( &tset)) && (ok_to_clear_weights(&network))) n_outputs = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N HIDDEN1" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n < 0) || (n > MAX_HIDDEN)) { sprintf ( msg , "Illegal N HIDDEN1: %s", rest ) ; error_message ( msg ) ; continue ; } if (n_hidden1 == n) continue ; if (ok_to_clear_weights( &network )) n_hidden1 = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N HIDDEN2" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n < 0) || (n > MAX_HIDDEN)) { sprintf ( msg , "Illegal N HIDDEN2: %s", rest ) ; error_message ( msg ) ; continue ; } if (n && ! n_hidden1) { error_message ( "N HIDDEN2 must be 0 if N HIDDEN1 IS 0." ) ; continue ; } if (n_hidden2 == n) continue ; if (ok_to_clear_weights( &network )) n_hidden2 = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "TRAIN" )) { if ((out_model == OUTMOD_AUTO) && (n_outputs != n_inputs)) { warning_message ( "Setting N OUTPUTS = N INPUTS" ) ; n_outputs = n_inputs ; } if (out_model <= 0) error_message ( "TRAIN used before OUTPUT MODEL set." ) ; else if (n_inputs <= 0) error_message ( "TRAIN used before N INPUTS set." ) ; else if (n_outputs <= 0) error_message ( "TRAIN used before N OUTPUTS set." ) ; else if ((net_model == NETMOD_HOP) && (n_inputs != n_outputs)) error_message("HOPFIELD netowork requires INPUTS = OUTPUTS."); else if ((net_model == NETMOD_BAM) && (out_model != OUTMOD_GENERAL)) error_message("BAM network requires AUTO output mode."); else if ((net_model == NETMOD_HOP) && (out_model != OUTMOD_AUTO)) error_message("HOFIELD network requires AUTO output mode."); else if ((net_model != NETMOD_KOH) && (out_model == OUTMOD_CLASSIFY) && (classif_output < 0)) error_message( "CLASSIFY output mode but CLASSIFY OUTPUT not set."); else if ((net_model == NETMOD_KOH) && (out_model != OUTMOD_CLASSIFY)) error_message( "KOHONEN network requires CLASSIFY output mode."); else { if (tset == NULL) { MEMTEXT ( "NEURAL: new tset" ) ; tset = new TrainingSet ( out_model , n_inputs , n_outputs ) ; } tset->train ( rest , classif_output ) ; strcpy(train_file,rest); } continue ; } if (check_anneal ( command , rest , &anneal_params )) continue ; if (check_genetic ( command , rest , &geninit_params )) continue ; if (check_kohonen ( command , rest , &koh_params , &network )) continue ; if (check_learn_params ( command , rest , &learn_params , net_model )) continue ; if (! strcmp ( command , "LEARN" )) { if ((tset == NULL) || (tset->ntrain == 0)) { error_message ( "Cannot LEARN; No training set exists." ) ; continue ; } if ((net_model == NETMOD_KOH) && (out_model != OUTMOD_CLASSIFY)) { error_message( "KOHONEN network requires CLASSIFY output mode."); continue ; } if (learn_params.init < 0) { error_message( "Initialization method not set."); continue ; } if (network == NULL) { if (net_model == NETMOD_LAYER) { if (n_hidden1 < 0) { error_message ( "LEARN used before N HIDDEN1 set." ) ; continue ; } else if (n_hidden2 < 0) { error_message ( "LEARN used before N HIDDEN2 set." ) ; continue ; } else { MEMTEXT ( "NEURAL: new LayerNet" ) ; network = new LayerNet ( out_model , n_inputs , n_hidden1 , n_hidden2 , n_outputs , 1 , 1 ) ; } } else if (net_model == NETMOD_KOH) { MEMTEXT ( "NEURAL: new KohNet" ) ; network = new KohNet ( n_inputs , n_outputs , &koh_params , 1 , 1 ) ; } else if (net_model == NETMOD_HOP) { MEMTEXT ( "NEURAL: new HopNet" ); network = new HopNet (n_inputs,n_outputs, 1,1); } else if (net_model == NETMOD_BAM) { MEMTEXT ("NEURAL: new BamNet"); network = new LayerNet ( out_model , n_inputs , n_hidden1 , n_hidden2 , n_outputs , 1 , 1 ) ; } } if ((network == NULL) || (! network->ok)) { // Malloc failure? memory_message ( "to create network." ) ; if (network != NULL) { delete network ; network = NULL ; } continue ; } normal_message("Learning...\n"); network->learn ( tset , &learn_params ) ; normal_message("End of Learning\n"); if (network->neterr > 0.999999) { // Indicates massive failure MEMTEXT ( "NEURAL: learn failure delete network" ) ; delete network ; network = NULL ; } else { sprintf ( msg , "Final error = %.4lf%% of max possible", 100.0 * network->neterr ) ; normal_message ( msg ) ; } continue ; } if (! strcmp ( command , "SAVE WEIGHTS" )) { if (network == NULL) error_message ( "There are no learned weights to save." ) ; else wt_save ( network , net_model , 0 , rest ) ; continue ; } if (! strcmp ( command , "RESTORE WEIGHTS" )) { if (network != NULL) { MEMTEXT ( "NEURAL: delete network for restore" ) ; delete network ; network = NULL ; } network = wt_restore ( rest , &net_model ) ; if (network == NULL) continue ; if (tset != NULL) { if ((tset->nin != network->nin) || (tset->nout != network->nout) || (tset->outmod != network->outmod)) { error_message ( "Network conflicts with existing training set."); continue ; } } out_model = network->outmod ; n_inputs = network->nin ; n_outputs = network->nout ; if (net_model == NETMOD_LAYER) { n_hidden1 = ((LayerNet*) network)->nhid1 ; n_hidden2 = ((LayerNet*) network)->nhid2 ; } if (net_model == NETMOD_KOH) koh_params.normalization = ((KohNet *) network)->normalization ; learn_params.init = -1 ; continue ; } if (! strcmp ( command , "CLEAR TRAINING" )) { if (tset != NULL) { MEMTEXT ( "NEURAL: delete tset" ) ; delete tset ; tset = NULL ; } continue ; } if (! strcmp ( command , "CLEAR WEIGHTS" )) { if (network != NULL) { MEMTEXT ( "NEURAL: delete network" ) ; delete network ; network = NULL ; } continue ; } if (! strcmp ( command , "CLASSIFY OUTPUT" )) { if (net_model == NETMOD_KOH) { error_message ( "Cannot specify output for KOHONEN model." ) ; continue ; } if (n_outputs < 0) { error_message ( "CLASSIFY OUTPUT used before N OUTPUTS set." ) ; continue ; } if (out_model != OUTMOD_CLASSIFY) { error_message ( "CLASSIFY OUTPUT only valid when OUTPUT MODEL:CLASSIFY" ) ; continue ; } m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n < 0)) { sprintf ( msg , "Illegal CLASSIFY OUTPUT: %s", rest ) ; error_message ( msg ) ; } else if (n > n_outputs) { sprintf ( msg , "CLASSIFY OUTPUT (%d) exceeds N OUTPUTS (%d)", n, n_outputs ) ; error_message ( msg ) ; } else classif_output = n ; continue ; } if (! strcmp ( command , "OUTPUT FILE" )) { strcpy ( out_file , rest ) ; continue ; } if (! strcmp ( command , "EXECUTE" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else { network->execute_from_file ( rest , out_file) ; continue ; } } if (! strcmp ( command , "TEST NETWORK" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else { network->test_from_file ( rest ,out_file,net_model) ; continue ; } } if (! strcmp ( command , "CLASSIFY" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else if (out_model != OUTMOD_CLASSIFY) error_message ( "CLASSIFY valid only in CLASSIFY output mode" ) ; else network->classify_from_file ( rest , threshold ) ; continue ; } if (! strcmp ( command , "RESET CONFUSION" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else network->reset_confusion () ; continue ; } if (! strcmp ( command , "CONFUSION THRESHOLD" )) { p = atof ( rest ) ; if ((p < 0.0) || (p > 100.0)) { sprintf ( msg , "Illegal CONFUSION THRESHOLD: %s", rest ) ; error_message ( msg ) ; } else threshold = p / 100.0 ; continue ; } if (! strcmp ( command , "SHOW CONFUSION" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else if (out_model != OUTMOD_CLASSIFY) error_message ( "CONFUSION valid only in CLASSIFY output mode" ) ; else network->show_confusion () ; continue ; } if (! strcmp ( command , "SAVE CONFUSION" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else if (out_model != OUTMOD_CLASSIFY) error_message ( "CONFUSION valid only in CLASSIFY output mode" ) ; else network->save_confusion ( rest ) ; continue ; } sprintf ( msg , "Unknown command: %s", command ) ; error_message ( msg ) ; } // Endless command loop MEMTEXT ( "NEURAL: control_line, msg" ) ; FREE ( control_line ) ; FREE ( msg ) ; MEMCLOSE () ; exit ( 0 ) ; }
int main ( int argc , // Number of command line arguments (includes prog name) char *argv[] // Arguments (prog name is argv[0]) ) { int i, j, k, nvars, ncases, irep, nreps, ivar, nties, ties ; int n_indep_vars, idep, icand, *index, *mcpt_max_counts, *mcpt_same_counts, *mcpt_solo_counts ; double *data, *work, dtemp, *save_info, criterion, *crits ; char filename[256], **names, depname[256] ; FILE *fp ; MutualInformationAdaptive *mi_adapt ; /* Process command line parameters */ #if 1 if (argc != 5) { printf ( "\nUsage: MI_ONLY datafile n_indep depname nreps" ) ; printf ( "\n datafile - name of the text file containing the data" ) ; printf ( "\n The first line is variable names" ) ; printf ( "\n Subsequent lines are the data." ) ; printf ( "\n Delimiters can be space, comma, or tab" ) ; printf ( "\n n_indep - Number of independent vars, starting with the first" ) ; printf ( "\n depname - Name of the 'dependent' variable" ) ; printf ( "\n It must be AFTER the first n_indep variables" ) ; printf ( "\n nreps - Number of Monte-Carlo permutations, including unpermuted" ) ; exit ( 1 ) ; } strcpy ( filename , argv[1] ) ; n_indep_vars = atoi ( argv[2] ) ; strcpy ( depname , argv[3] ) ; nreps = atoi ( argv[4] ) ; #else strcpy ( filename , "..\\SYNTH.TXT" ) ; n_indep_vars = 7 ; strcpy ( depname , "SUM1234" ) ; nreps = 100 ; #endif _strupr ( depname ) ; /* These are used by MEM.CPP for runtime memory validation */ _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ; fp = fopen ( mem_file_name , "wt" ) ; if (fp == NULL) { // Should never happen printf ( "\nCannot open MEM.LOG file for writing!" ) ; return EXIT_FAILURE ; } fclose ( fp ) ; mem_keep_log = 0 ; // Change this to 1 to keep a memory use log (slows execution!) mem_max_used = 0 ; /* Open the text file to which results will be written */ fp = fopen ( "MI_ONLY.LOG" , "wt" ) ; if (fp == NULL) { // Should never happen printf ( "\nCannot open MI_ONLY.LOG file for writing!" ) ; return EXIT_FAILURE ; } /* Read the file and locate the index of the dependent variable */ if (readfile ( filename , &nvars , &names , &ncases , &data )) return EXIT_FAILURE ; for (idep=0 ; idep<nvars ; idep++) { if (! strcmp ( depname , names[idep] )) break ; } if (idep == nvars) { printf ( "\nERROR... Dependent variable %s is not in file", depname ) ; return EXIT_FAILURE ; } if (idep < n_indep_vars) { printf ( "\nERROR... Dependent variable %s must be beyond independent vars", depname ) ; return EXIT_FAILURE ; } /* Check each variable for ties. This is not needed for the algorithm, but it is good to warn the user, because more than a very few tied values in any variable seriously degrades performance of the adaptive partitioning algorithm. */ MEMTEXT ( "MI_ONLY: Work" ) ; work = (double *) MALLOC ( ncases * sizeof(double) ) ; assert ( work != NULL ) ; ties = 0 ; assert ( work != NULL ) ; for (ivar=0 ; ivar<nvars ; ivar++) { if (ivar > n_indep_vars && ivar != idep) continue ; // Check only the variables selected by the user for (i=0 ; i<ncases ; i++) work[i] = data[i*nvars+ivar] ; qsortd ( 0 , ncases-1 , work ) ; nties = 0 ; for (i=1 ; i<ncases ; i++) { if (work[i] == work[i-1]) ++nties ; } if ((double) nties / (double) ncases > 0.05) { ++ties ; fprintf ( fp , "\nWARNING... %s has %.2lf percent ties!", names[ivar], 100.0 * nties / (double) ncases ) ; } } // For all variables if (ties) { fprintf ( fp , "\nThe presence of ties will seriously degrade" ) ; fprintf ( fp , "\nperformance of the adaptive partitioning algorithm\n\n" ) ; } /* Allocate scratch memory and create the MutualInformation object using the dependent variable crits - Mutual information criterion index - Indices that sort the criterion save_info - Ditto, this is univariate information, to be sorted mi_adapt - The MutualInformation object, constructed with the 'dependent' variable */ MEMTEXT ( "MI_ONLY work allocs plus MutualInformation" ) ; crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( crits != NULL ) ; index = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( index != NULL ) ; mcpt_max_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( mcpt_max_counts != NULL ) ; mcpt_same_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( mcpt_same_counts != NULL ) ; mcpt_solo_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( mcpt_solo_counts != NULL ) ; save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( save_info != NULL ) ; for (irep=0 ; irep<nreps ; irep++) { for (i=0 ; i<ncases ; i++) // Get the 'dependent' variable work[i] = data[i*nvars+idep] ; // Shuffle dependent variable if in permutation run (irep>0) if (irep) { // If doing permuted runs, shuffle i = ncases ; // Number remaining to be shuffled while (i > 1) { // While at least 2 left to shuffle j = (int) (unifrand () * i) ; if (j >= i) j = i - 1 ; dtemp = work[--i] ; work[i] = work[j] ; work[j] = dtemp ; } } // Here we use a tiny split theshold (instead of the usual 6.0) so that it picks up // small amounts of mutual information (perhaps including noise). // If we used 6.0, nearly all permutations of any reasonably sized dataset // would have a computed mutual information of zero. It's safe picking up // some noise because the permutation test will account for this. mi_adapt = new MutualInformationAdaptive ( ncases , work , 1 , 0.1 ) ; // Deliberately tiny for low information assert ( mi_adapt != NULL ) ; /* Compute and save the mutual information for the dependent variable with each individual independent variable candidate. */ for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates for (i=0 ; i<ncases ; i++) work[i] = data[i*nvars+icand] ; criterion = mi_adapt->mut_inf ( work , 1 ) ; save_info[icand] = criterion ; // We will sort this when all candidates are done if (irep == 0) { // If doing original (unpermuted), save criterion index[icand] = icand ; // Will need original indices when criteria are sorted crits[icand] = criterion ; mcpt_max_counts[icand] = mcpt_same_counts[icand] = mcpt_solo_counts[icand] = 1 ; // This is >= itself so count it now } else { if (criterion >= crits[icand]) ++mcpt_solo_counts[icand] ; } } // Initial list of all candidates delete mi_adapt ; mi_adapt = NULL ; if (irep == 0) // Find the indices that sort the candidates per criterion qsortdsi ( 0 , n_indep_vars-1 , save_info , index ) ; else { qsortd ( 0 , n_indep_vars-1 , save_info ) ; for (icand=0 ; icand<n_indep_vars ; icand++) { if (save_info[icand] >= crits[index[icand]]) ++mcpt_same_counts[index[icand]] ; if (save_info[n_indep_vars-1] >= crits[index[icand]]) // Valid only for largest ++mcpt_max_counts[index[icand]] ; } } } // For all reps fprintf ( fp , "\nAdaptive partitioning mutual information of %s", depname); fprintf ( fp , "\n" ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\nPredictors, in order of decreasing mutual information" ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\n Variable Information Solo pval Min pval Max pval" ) ; for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates k = index[n_indep_vars-1-icand] ; // Index of sorted candidate fprintf ( fp , "\n%31s %11.5lf %12.4lf %10.4lf %10.4lf", names[k], crits[k], (double) mcpt_solo_counts[k] / nreps, (double) mcpt_same_counts[k] / nreps, (double) mcpt_max_counts[k] / nreps ) ; } MEMTEXT ( "MI_ONLY: Finish" ) ; fclose ( fp ) ; FREE ( work ) ; FREE ( crits ) ; FREE ( index ) ; FREE ( mcpt_max_counts ) ; FREE ( mcpt_same_counts ) ; FREE ( mcpt_solo_counts ) ; FREE ( save_info ) ; free_data ( nvars , names , data ) ; MEMCLOSE () ; printf ( "\n\nPress any key..." ) ; _getch () ; return EXIT_SUCCESS ; }
int main ( int argc , // Number of command line arguments (includes prog name) char *argv[] // Arguments (prog name is argv[0]) ) { /* Declarations of local variables */ /* User's command control line related variables are here. Control_file_number and control_files permit nesting of 'CONTROL' commands. If control_file_number equals -1, control commands are read from stdin. Otherwise they are read from that file in FILE *control_files. Up to MAX_CONTROL_FILES can be stacked. */ int control_file_number = -1 ; // Stack pointer for control files FILE *control_files[MAX_CONTROL_FILES] ; // This is the stack char *control_line ; // User's commands here char *command, *rest ; // Pointers to its command and parameter parts int n_command, n_rest ; // Lengths of those parts /* These are network parameters which may be set by the user via commands. They are initialized to defaults which indicate that the user has not yet set them. As they are set, their current values are placed here. When learning is done for a network, their values are copied from here into the network object. When a network is read, the object's values are copied from it to here. Otherwise, these variables are not used; the values in the network object itself are used. The only purpose of these variables is to keep track of current values. */ int net_model = -1 ; // Network model (see NETMOD_? in CONST.H) int out_model = -1 ; // Output model (see OUTMOD_? in CONST.H) int n_inputs = -1 ; // Number of input neurons int n_outputs = -1 ; // Number of output neurons int n_hidden1 = -1 ; // Number of hidden layer one neurons int n_hidden2 = -1 ; // Ditto layer 2 (0 if just one hidden layer) TrainingSet *tset = NULL ; // Training set here Network *network = NULL ; // Network here struct LearnParams learn_params ; // General learning parameters struct AnnealParams anneal_params ; // Simulated annealing parameters struct GenInitParams geninit_params ; // Genetic initialization parameters struct KohParams koh_params ; // Kohonen parameters int classif_output = -1 ; // Current class (0=reject) for classif training char out_file[80] = "" ; // File for EXECUTE output double threshold ; // CLASSIFY confusion reject cutoff /* Miscellaneous variables */ int i, n, m ; double p ; char *msg ; FILE *fp ; /* -------------------------------------------------------------------------------- Program starts here. Verify that a careless user didn't fail to set the integer size correctly when compiling. -------------------------------------------------------------------------------- */ #if VERSION_16_BIT if (sizeof(int) > 2) { printf ( "\nRecompile with VERSION_16_BIT set to 0 in CONST.H" ) ; exit ( 1 ) ; } #else if (sizeof(int) < 4) { printf ( "\nRecompile with VERSION_16_BIT set to 1 in CONST.H" ) ; exit ( 1 ) ; } #endif printf ( "\nNEURAL - Program to train and test neural networks" ) ; printf("\nCopyright (c) 1993 by Academic Press, Inc."); printf("\nAll rights reserved. Permission is hereby granted, until further notice,"); printf("\nto make copies of this diskette, which are not for resale, provided these"); printf("\ncopies are made from this master diskette only, and provided that the"); printf("\nfollowing copyright notice appears on the diskette label:"); printf("\n(c) 1993 by Academic Press, Inc."); printf("\nExcept as previously stated, no part of the computer program embodied in"); printf("\nthis diskette may be reproduced or transmitted in any form or by any means,"); printf("\nelectronic or mechanical, including input into storage in any information"); printf("\nsystem for resale, without permission in writing from the publisher."); printf("\nProduced in the United States of America."); printf("\nISBN 0-12-479041-0"); /* Process command line parameters */ mem_name[0] = 0 ; // Default is no memory allocation file for (i=1 ; i<argc ; i++) { // Process all command line args str_to_upr ( argv[i] ) ; // Easier if all upper case if (! strcmp ( argv[i] , "/DEBUG" )) { sscanf ( argv[++i] , "%s" , mem_name ) ; if ((strlen ( mem_name ) > 1) || ! isalpha ( mem_name[0] )) { printf ( "\nIllegal DEBUG drive (%s); must be 1 letter." ) ; exit ( 1 ) ; } continue ; } printf ( "\nIllegal command line parameter (%s)", argv[i] ) ; exit ( 1 ) ; } /* Initialize memory allocation debugging */ if (strlen ( mem_name )) { strcat ( mem_name , ":mem.log" ) ; fp = fopen ( mem_name , "wt" ) ; if (fp == NULL) { printf ( "\nCannot open debugging file %s", mem_name ) ; exit ( 1 ) ; } fclose ( fp ) ; mem_log = 1 ; } else mem_log = 0 ; mem_used = 0 ; /* Initialize defaults */ learn_params.init = -1 ; learn_params.quit_err = 0.0 ; learn_params.retries = 32767 ; anneal_params.temps0 = 3 ; anneal_params.temps = 4 ; anneal_params.iters0 = 50 ; anneal_params.iters = 20 ; anneal_params.setback0 = 50 ; anneal_params.setback = 20 ; anneal_params.start0 = 3.0 ; anneal_params.start = 4.0 ; anneal_params.stop0 = 1.0 ; anneal_params.stop = 0.02 ; geninit_params.pool = 50 ; geninit_params.gens = 3 ; geninit_params.climb = 0 ; geninit_params.overinit = 1.5 ; geninit_params.pcross = 0.8 ; geninit_params.pmutate = 0.0001 ; koh_params.normalization = 0 ; // 0=multiplicative, 1=Z koh_params.learn_method = 1 ; // 0=additive, 1=subtractive koh_params.rate = 0.4 ; // learning rate koh_params.reduction = 0.99 ; // learning rate reduction learn_params.ap = &anneal_params ; learn_params.gp = &geninit_params ; learn_params.kp = &koh_params ; act_func_init () ; // Initialize interpolation table for activation function MEMTEXT ( "NEURAL: control_line, msg" ) ; if (((control_line = (char *) MALLOC ( CONTROL_LINE_LENGTH+1 )) == NULL) || ((msg = (char *) MALLOC ( CONTROL_LINE_LENGTH+1 )) == NULL)) { printf ( "\nInsufficient memory" ) ; exit ( 1 ) ; } /* Main loop processes all commands */ for (;;) { get_control_line ( control_line , &control_file_number, control_files ) ; split_control_line ( control_line , &command , &n_command , &rest , &n_rest ) ; if (! n_command) { if (n_rest) { sprintf ( msg , "No colon after command: %s", rest ) ; error_message ( msg ) ; } continue ; } sprintf ( msg , "%s : %s", command, rest ) ; normal_message ( msg ) ; /* Act on the command */ if (! strcmp ( command , "QUIT" )) break ; if (! strcmp ( command , "CONTROL" )) { stack_control_file ( rest , &control_file_number , control_files ) ; continue ; } if (! strcmp ( command , "NETWORK MODEL" )) { if (! strcmp ( rest , "LAYER" )) n = NETMOD_LAYER ; else if (! strcmp ( rest , "KOHONEN" )) n = NETMOD_KOH ; else { sprintf ( msg , "Illegal NETWORK MODEL: %s", rest ) ; error_message ( msg ) ; continue ; } if (net_model == n) continue ; if (ok_to_clear_weights( &network )) { net_model = n ; learn_params.init = -1 ; } else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "OUTPUT MODEL" )) { if (! strcmp ( rest , "CLASSIFY" )) n = OUTMOD_CLASSIFY ; else if (! strcmp ( rest , "AUTO" )) n = OUTMOD_AUTO ; else if (! strcmp ( rest , "GENERAL" )) n = OUTMOD_GENERAL ; else { sprintf ( msg , "Illegal OUTPUT MODEL: %s", rest ) ; error_message ( msg ) ; continue ; } if (out_model == n) continue ; if ((ok_to_clear_tset( &tset )) && (ok_to_clear_weights( &network))) out_model = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N INPUTS" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n <= 0) || (n > MAX_INPUTS)) { sprintf ( msg , "Illegal N INPUTS: %s", rest ) ; error_message ( msg ) ; continue ; } if (n_inputs == n) continue ; if ((ok_to_clear_tset( &tset)) && (ok_to_clear_weights(&network))) n_inputs = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N OUTPUTS" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n <= 0) || (n > MAX_OUTPUTS)) { sprintf ( msg , "Illegal N OUTPUTS: %s", rest ) ; error_message ( msg ) ; continue ; } if (n_outputs == n) continue ; if ((ok_to_clear_tset( &tset)) && (ok_to_clear_weights(&network))) n_outputs = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N HIDDEN1" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n < 0) || (n > MAX_HIDDEN)) { sprintf ( msg , "Illegal N HIDDEN1: %s", rest ) ; error_message ( msg ) ; continue ; } if (n_hidden1 == n) continue ; if (ok_to_clear_weights( &network )) n_hidden1 = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "N HIDDEN2" )) { m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n < 0) || (n > MAX_HIDDEN)) { sprintf ( msg , "Illegal N HIDDEN2: %s", rest ) ; error_message ( msg ) ; continue ; } if (n && ! n_hidden1) { error_message ( "N HIDDEN2 must be 0 if N HIDDEN1 IS 0." ) ; continue ; } if (n_hidden2 == n) continue ; if (ok_to_clear_weights( &network )) n_hidden2 = n ; else warning_message ( "Command aborted" ) ; continue ; } if (! strcmp ( command , "TRAIN" )) { if ((out_model == OUTMOD_AUTO) && (n_outputs != n_inputs)) { warning_message ( "Setting N OUTPUTS = N INPUTS" ) ; n_outputs = n_inputs ; } if (out_model <= 0) error_message ( "TRAIN used before OUTPUT MODEL set." ) ; else if (n_inputs <= 0) error_message ( "TRAIN used before N INPUTS set." ) ; else if (n_outputs <= 0) error_message ( "TRAIN used before N OUTPUTS set." ) ; else if ((net_model != NETMOD_KOH) && (out_model == OUTMOD_CLASSIFY) && (classif_output < 0)) error_message( "CLASSIFY output mode but CLASSIFY OUTPUT not set."); else if ((net_model == NETMOD_KOH) && (out_model != OUTMOD_CLASSIFY)) error_message( "KOHONEN network requires CLASSIFY output mode."); else { if (tset == NULL) { MEMTEXT ( "NEURAL: new tset" ) ; tset = new TrainingSet ( out_model , n_inputs , n_outputs ) ; } tset->train ( rest , classif_output ) ; } continue ; } if (check_anneal ( command , rest , &anneal_params )) continue ; if (check_genetic ( command , rest , &geninit_params )) continue ; if (check_kohonen ( command , rest , &koh_params , &network )) continue ; if (check_learn_params ( command , rest , &learn_params , net_model )) continue ; if (! strcmp ( command , "LEARN" )) { if ((tset == NULL) || (tset->ntrain == 0)) { error_message ( "Cannot LEARN; No training set exists." ) ; continue ; } if ((net_model == NETMOD_KOH) && (out_model != OUTMOD_CLASSIFY)) { error_message( "KOHONEN network requires CLASSIFY output mode."); continue ; } if (learn_params.init < 0) { error_message( "Initialization method not set."); continue ; } if (network == NULL) { if (net_model == NETMOD_LAYER) { if (n_hidden1 < 0) { error_message ( "LEARN used before N HIDDEN1 set." ) ; continue ; } else if (n_hidden2 < 0) { error_message ( "LEARN used before N HIDDEN2 set." ) ; continue ; } else { MEMTEXT ( "NEURAL: new LayerNet" ) ; network = new LayerNet ( out_model , n_inputs , n_hidden1 , n_hidden2 , n_outputs , 1 , 1 ) ; } } else if (net_model == NETMOD_KOH) { MEMTEXT ( "NEURAL: new KohNet" ) ; network = new KohNet ( n_inputs , n_outputs , &koh_params , 1 , 1 ) ; } } if ((network == NULL) || (! network->ok)) { // Malloc failure? memory_message ( "to create network." ) ; if (network != NULL) { delete network ; network = NULL ; } continue ; } network->learn ( tset , &learn_params ) ; if (network->neterr > 0.999999) { // Indicates massive failure MEMTEXT ( "NEURAL: learn failure delete network" ) ; delete network ; network = NULL ; } else { sprintf ( msg , "Final error = %.4lf%% of max possible", 100.0 * network->neterr ) ; normal_message ( msg ) ; } continue ; } if (! strcmp ( command , "SAVE WEIGHTS" )) { if (network == NULL) error_message ( "There are no learned weights to save." ) ; else wt_save ( network , net_model , 0 , rest ) ; continue ; } if (! strcmp ( command , "RESTORE WEIGHTS" )) { if (network != NULL) { MEMTEXT ( "NEURAL: delete network for restore" ) ; delete network ; network = NULL ; } network = wt_restore ( rest , &net_model ) ; if (network == NULL) continue ; if (tset != NULL) { if ((tset->nin != network->nin) || (tset->nout != network->nout) || (tset->outmod != network->outmod)) { error_message ( "Network conflicts with existing training set."); continue ; } } out_model = network->outmod ; n_inputs = network->nin ; n_outputs = network->nout ; if (net_model == NETMOD_LAYER) { n_hidden1 = ((LayerNet*) network)->nhid1 ; n_hidden2 = ((LayerNet*) network)->nhid2 ; } if (net_model == NETMOD_KOH) koh_params.normalization = ((KohNet *) network)->normalization ; learn_params.init = -1 ; continue ; } if (! strcmp ( command , "CLEAR TRAINING" )) { if (tset != NULL) { MEMTEXT ( "NEURAL: delete tset" ) ; delete tset ; tset = NULL ; } continue ; } if (! strcmp ( command , "CLEAR WEIGHTS" )) { if (network != NULL) { MEMTEXT ( "NEURAL: delete network" ) ; delete network ; network = NULL ; } continue ; } if (! strcmp ( command , "CLASSIFY OUTPUT" )) { if (net_model == NETMOD_KOH) { error_message ( "Cannot specify output for KOHONEN model." ) ; continue ; } if (n_outputs < 0) { error_message ( "CLASSIFY OUTPUT used before N OUTPUTS set." ) ; continue ; } if (out_model != OUTMOD_CLASSIFY) { error_message ( "CLASSIFY OUTPUT only valid when OUTPUT MODEL:CLASSIFY" ) ; continue ; } m = sscanf ( rest , "%d" , &n ) ; if ((m <= 0) || (n < 0)) { sprintf ( msg , "Illegal CLASSIFY OUTPUT: %s", rest ) ; error_message ( msg ) ; } else if (n > n_outputs) { sprintf ( msg , "CLASSIFY OUTPUT (%d) exceeds N OUTPUTS (%d)", n, n_outputs ) ; error_message ( msg ) ; } else classif_output = n ; continue ; } if (! strcmp ( command , "OUTPUT FILE" )) { strcpy ( out_file , rest ) ; continue ; } if (! strcmp ( command , "EXECUTE" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else network->execute_from_file ( rest , out_file ) ; continue ; } if (! strcmp ( command , "CLASSIFY" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else if (out_model != OUTMOD_CLASSIFY) error_message ( "CLASSIFY valid only in CLASSIFY output mode" ) ; else network->classify_from_file ( rest , threshold ) ; continue ; } if (! strcmp ( command , "RESET CONFUSION" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else network->reset_confusion () ; continue ; } if (! strcmp ( command , "CONFUSION THRESHOLD" )) { p = atof ( rest ) ; if ((p < 0.0) || (p > 100.0)) { sprintf ( msg , "Illegal CONFUSION THRESHOLD: %s", rest ) ; error_message ( msg ) ; } else threshold = p / 100.0 ; continue ; } if (! strcmp ( command , "SHOW CONFUSION" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else if (out_model != OUTMOD_CLASSIFY) error_message ( "CONFUSION valid only in CLASSIFY output mode" ) ; else network->show_confusion () ; continue ; } if (! strcmp ( command , "SAVE CONFUSION" )) { if (network == NULL) error_message ( "There is no trained network" ) ; else if (out_model != OUTMOD_CLASSIFY) error_message ( "CONFUSION valid only in CLASSIFY output mode" ) ; else network->save_confusion ( rest ) ; continue ; } sprintf ( msg , "Unknown command: %s", command ) ; error_message ( msg ) ; } // Endless command loop MEMTEXT ( "NEURAL: control_line, msg" ) ; FREE ( control_line ) ; FREE ( msg ) ; MEMCLOSE () ; return 0 ; }
int main ( int argc , // Number of command line arguments (includes prog name) char *argv[] // Arguments (prog name is argv[0]) ) { int i, j, k, nvars, ncases, irep, nreps, nbins, nbins_dep, nbins_indep, *count ; int n_indep_vars, idep, icand, *index, *mcpt_max_counts, *mcpt_same_counts, *mcpt_solo_counts ; short int *bins_dep, *bins_indep ; double *data, *work, dtemp, *save_info, criterion, *crits ; double *ab, *bc, *b ; char filename[256], **names, depname[256] ; FILE *fp ; /* Process command line parameters */ #if 1 if (argc != 6) { printf ( "\nUsage: TRANSFER datafile n_indep depname nreps" ) ; printf ( "\n datafile - name of the text file containing the data" ) ; printf ( "\n The first line is variable names" ) ; printf ( "\n Subsequent lines are the data." ) ; printf ( "\n Delimiters can be space, comma, or tab" ) ; printf ( "\n n_indep - Number of independent vars, starting with the first" ) ; printf ( "\n depname - Name of the 'dependent' variable" ) ; printf ( "\n It must be AFTER the first n_indep variables" ) ; printf ( "\n nbins - Number of bins for all variables" ) ; printf ( "\n nreps - Number of Monte-Carlo permutations, including unpermuted" ) ; exit ( 1 ) ; } strcpy ( filename , argv[1] ) ; n_indep_vars = atoi ( argv[2] ) ; strcpy ( depname , argv[3] ) ; nbins = atoi ( argv[4] ) ; nreps = atoi ( argv[5] ) ; #else strcpy ( filename , "..\\SYNTH.TXT" ) ; n_indep_vars = 7 ; strcpy ( depname , "SUM1234" ) ; nbins = 2 ; nreps = 1 ; #endif _strupr ( depname ) ; /* These are used by MEM.CPP for runtime memory validation */ _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ; fp = fopen ( mem_file_name , "wt" ) ; if (fp == NULL) { // Should never happen printf ( "\nCannot open MEM.LOG file for writing!" ) ; return EXIT_FAILURE ; } fclose ( fp ) ; mem_keep_log = 1 ; // Change this to 1 to keep a memory use log (slows execution!) mem_max_used = 0 ; /* Open the text file to which results will be written */ fp = fopen ( "TRANSFER.LOG" , "wt" ) ; if (fp == NULL) { // Should never happen printf ( "\nCannot open TRANSFER.LOG file for writing!" ) ; return EXIT_FAILURE ; } /* Read the file and locate the index of the dependent variable */ if (readfile ( filename , &nvars , &names , &ncases , &data )) return EXIT_FAILURE ; for (idep=0 ; idep<nvars ; idep++) { if (! strcmp ( depname , names[idep] )) break ; } if (idep == nvars) { printf ( "\nERROR... Dependent variable %s is not in file", depname ) ; return EXIT_FAILURE ; } if (idep < n_indep_vars) { printf ( "\nERROR... Dependent variable %s must be beyond independent vars", depname ) ; return EXIT_FAILURE ; } /* Allocate scratch memory crits - Transfer Entropy criterion index - Indices that sort the criterion save_info - Ditto, this is univariate criteria, to be sorted */ MEMTEXT ( "TRANSFER work allocs" ) ; work = (double *) MALLOC ( ncases * sizeof(double) ) ; assert ( work != NULL ) ; crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( crits != NULL ) ; index = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( index != NULL ) ; bins_indep = (short int *) MALLOC ( ncases * sizeof(short int) ) ; assert ( bins_indep != NULL ) ; bins_dep = (short int *) MALLOC ( ncases * sizeof(short int) ) ; assert ( bins_dep != NULL ) ; mcpt_max_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( mcpt_max_counts != NULL ) ; mcpt_same_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( mcpt_same_counts != NULL ) ; mcpt_solo_counts = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( mcpt_solo_counts != NULL ) ; save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( save_info != NULL ) ; count = (int *) MALLOC ( nbins * nbins * nbins * sizeof(int) ) ; assert ( count != NULL ) ; ab = (double *) MALLOC ( nbins * nbins * sizeof(double) ) ; assert ( ab != NULL ) ; bc = (double *) MALLOC ( nbins * nbins * sizeof(double) ) ; assert ( bc != NULL ) ; b = (double *) MALLOC ( nbins * sizeof(double) ) ; assert ( b != NULL ) ; /* Get the dependent variable and partition it */ for (i=0 ; i<ncases ; i++) // Get the 'dependent' variable work[i] = data[i*nvars+idep] ; nbins_dep = nbins ; partition ( ncases , work , &nbins_dep , NULL , bins_dep ) ; /* Replication loop is here */ for (irep=0 ; irep<nreps ; irep++) { /* Compute and save the transfer entropy of the dependent variable with each individual independent variable candidate. */ for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates for (i=0 ; i<ncases ; i++) work[i] = data[i*nvars+icand] ; // Shuffle independent variable if in permutation run (irep>0) if (irep) { // If doing permuted runs, shuffle i = ncases ; // Number remaining to be shuffled while (i > 1) { // While at least 2 left to shuffle j = (int) (unifrand () * i) ; if (j >= i) j = i - 1 ; dtemp = work[--i] ; work[i] = work[j] ; work[j] = dtemp ; } } nbins_indep = nbins ; partition ( ncases , work , &nbins_indep , NULL , bins_indep ) ; criterion = trans_ent ( ncases , nbins_indep , nbins_dep , bins_indep , bins_dep , 0 , 1 , 1 , count , ab , bc , b ) ; save_info[icand] = criterion ; // We will sort this when all candidates are done if (irep == 0) { // If doing original (unpermuted), save criterion index[icand] = icand ; // Will need original indices when criteria are sorted crits[icand] = criterion ; mcpt_max_counts[icand] = mcpt_same_counts[icand] = mcpt_solo_counts[icand] = 1 ; // This is >= itself so count it now } else { if (criterion >= crits[icand]) ++mcpt_solo_counts[icand] ; } } // Initial list of all candidates if (irep == 0) // Find the indices that sort the candidates per criterion qsortdsi ( 0 , n_indep_vars-1 , save_info , index ) ; else { qsortd ( 0 , n_indep_vars-1 , save_info ) ; for (icand=0 ; icand<n_indep_vars ; icand++) { if (save_info[icand] >= crits[index[icand]]) ++mcpt_same_counts[index[icand]] ; if (save_info[n_indep_vars-1] >= crits[index[icand]]) // Valid only for largest ++mcpt_max_counts[index[icand]] ; } } } // For all reps fprintf ( fp , "\nTransfer entropy of %s", depname); fprintf ( fp , "\n" ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\nPredictors, in order of decreasing transfer entropy" ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\n Variable Information Solo pval Min pval Max pval" ) ; for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates k = index[n_indep_vars-1-icand] ; // Index of sorted candidate fprintf ( fp , "\n%31s %11.5lf %12.4lf %10.4lf %10.4lf", names[k], crits[k], (double) mcpt_solo_counts[k] / nreps, (double) mcpt_same_counts[k] / nreps, (double) mcpt_max_counts[k] / nreps ) ; } MEMTEXT ( "TRANSFER: Finish" ) ; fclose ( fp ) ; FREE ( work ) ; FREE ( crits ) ; FREE ( index ) ; FREE ( bins_indep ) ; FREE ( bins_dep ) ; FREE ( mcpt_max_counts ) ; FREE ( mcpt_same_counts ) ; FREE ( mcpt_solo_counts ) ; FREE ( save_info ) ; FREE ( count ) ; FREE ( ab ) ; FREE ( bc ) ; FREE ( b ) ; free_data ( nvars , names , data ) ; MEMCLOSE () ; printf ( "\n\nPress any key..." ) ; _getch () ; return EXIT_SUCCESS ; }
int main ( int argc , // Number of command line arguments (includes prog name) char *argv[] // Arguments (prog name is argv[0]) ) { int i, j, k, nvars, ncases, ndiv, maxkept, ivar, nties, ties ; int n_indep_vars, idep, icand, iother, ibest, *sortwork, nkept, *kept ; double *data, *work ; double *save_info, *univar_info, *pair_info, bestredun, redun, bestcrit ; double criterion, relevance, redundancy, *crits, *reduns ; char filename[256], **names, depname[256] ; char trial_name[256], *pair_found ; FILE *fp ; MutualInformationParzen *mi_parzen ; MutualInformationAdaptive *mi_adapt ; /* Process command line parameters */ #if 1 if (argc != 6) { printf ( "\nUsage: MI_CONT datafile n_indep depname ndiv maxkept" ) ; printf ( "\n datafile - name of the text file containing the data" ) ; printf ( "\n The first line is variable names" ) ; printf ( "\n Subsequent lines are the data." ) ; printf ( "\n Delimiters can be space, comma, or tab" ) ; printf ( "\n n_indep - Number of independent vars, starting with the first" ) ; printf ( "\n depname - Name of the 'dependent' variable" ) ; printf ( "\n It must be AFTER the first n_indep variables" ) ; printf ( "\n ndiv - Normally zero, to employ adaptive partitioning" ) ; printf ( "\n Specify 5 (for very few cases) to 15 (for an" ) ; printf ( "\n enormous number of cases) to use Parzen windows" ) ; printf ( "\n maxkept - Stepwise will allow at most this many predictors" ) ; exit ( 1 ) ; } strcpy ( filename , argv[1] ) ; n_indep_vars = atoi ( argv[2] ) ; strcpy ( depname , argv[3] ) ; ndiv = atoi ( argv[4] ) ; maxkept = atoi ( argv[5] ) ; #else strcpy ( filename , "..\\VARS.TXT" ) ; n_indep_vars = 8 ; strcpy ( depname , "DAY_RETURN" ) ; ndiv = 0 ; maxkept = 5 ; #endif _strupr ( depname ) ; /* These are used by MEM.CPP for runtime memory validation */ _fullpath ( mem_file_name , "MEM.LOG" , 256 ) ; fp = fopen ( mem_file_name , "wt" ) ; if (fp == NULL) { // Should never happen printf ( "\nCannot open MEM.LOG file for writing!" ) ; return EXIT_FAILURE ; } fclose ( fp ) ; mem_keep_log = 1 ; mem_max_used = 0 ; /* Open the text file to which results will be written */ fp = fopen ( "MI_CONT.LOG" , "wt" ) ; if (fp == NULL) { // Should never happen printf ( "\nCannot open MI_CONT.LOG file for writing!" ) ; return EXIT_FAILURE ; } /* Read the file and locate the index of the 'dependent' variable */ if (readfile ( filename , &nvars , &names , &ncases , &data )) return EXIT_FAILURE ; for (idep=0 ; idep<nvars ; idep++) { if (! strcmp ( depname , names[idep] )) break ; } if (idep == nvars) { printf ( "\nERROR... Dependent variable %s is not in file", depname ) ; return EXIT_FAILURE ; } if (idep < n_indep_vars) { printf ( "\nERROR... Dependent variable %s must be beyond independent vars", depname ) ; return EXIT_FAILURE ; } /* If adaptive partitioning is specified, check each variable for ties. This is not needed for the algorithm, but it is good to warn the user, because more than a very few tied values in any variable seriously degrades performance of the adaptive partitioning algorithm. */ MEMTEXT ( "MI_CONT: Work" ) ; work = (double *) MALLOC ( ncases * sizeof(double) ) ; assert ( work != NULL ) ; if (ndiv == 0) { // If adaptive partitioning, check for ties ties = 0 ; assert ( work != NULL ) ; for (ivar=0 ; ivar<nvars ; ivar++) { if (ivar > n_indep_vars && ivar != idep) continue ; // Check only the variables selected by the user for (i=0 ; i<ncases ; i++) work[i] = data[i*nvars+ivar] ; qsortd ( 0 , ncases-1 , work ) ; nties = 0 ; for (i=1 ; i<ncases ; i++) { if (work[i] == work[i-1]) ++nties ; } if ((double) nties / (double) ncases > 0.05) { ++ties ; fprintf ( fp , "\nWARNING... %s has %.2lf percent ties!", names[ivar], 100.0 * nties / (double) ncases ) ; } } // For all variables if (ties) { fprintf ( fp , "\nThe presence of ties will seriously degrade" ) ; fprintf ( fp , "\nperformance of the adaptive partitioning algorithm\n\n" ) ; } } // If adaptive partitioning, so testing for ties in the data /* Allocate scratch memory and create the MutualInformation object using the dependent variable kept - Array of indices of variables kept so far crits - Ditto, criterion reduns - Ditto, redundancy sortwork - Temporary use for printing variable's information sorted save_info - Ditto, this is univariate information, to be sorted univar_info - Also univariate information, but not sorted, for use in stepwise pair_found - Flag: is there valid info in the corresponding element of the next array pair_info - Preserve pairwise information of indeps to avoid expensive recalculation mi_parzen - The MutualInformation object, constructed with the 'dependent' variable mi_adapt - Ditto, but used if adaptive partitioning */ MEMTEXT ( "MI_CONT 6 allocs plus MutualInformation" ) ; kept = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( kept != NULL ) ; crits = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( crits != NULL ) ; reduns = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( reduns != NULL ) ; sortwork = (int *) MALLOC ( n_indep_vars * sizeof(int) ) ; assert ( sortwork != NULL ) ; save_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( save_info != NULL ) ; univar_info = (double *) MALLOC ( n_indep_vars * sizeof(double) ) ; assert ( univar_info != NULL ) ; pair_found = (char *) MALLOC ( (n_indep_vars * (n_indep_vars+1) / 2) * sizeof(char) ) ; assert ( pair_found != NULL ) ; pair_info = (double *) MALLOC ( (n_indep_vars * (n_indep_vars+1) / 2) * sizeof(double) ) ; assert ( pair_info != NULL ) ; for (i=0 ; i<ncases ; i++) // Get the 'dependent' variable work[i] = data[i*nvars+idep] ; if (ndiv > 0) { mi_parzen = new MutualInformationParzen ( ncases , work , ndiv ) ; mi_adapt = NULL ; assert ( mi_parzen != NULL ) ; } else { mi_adapt = new MutualInformationAdaptive ( ncases , work , 0 , 6.0 ) ; mi_parzen = NULL ; assert ( mi_adapt != NULL ) ; } memset ( pair_found , 0 , (n_indep_vars * (n_indep_vars+1) / 2) * sizeof(char) ) ; if (ndiv > 0) fprintf ( fp , "\nParzen mutual information of %s (ndiv=%d)", depname, ndiv); else fprintf ( fp , "\nAdaptive partitioning mutual information of %s", depname); fprintf ( fp , "\n" ) ; fprintf ( fp , "\n---------------------------------------------------------------" ) ; fprintf ( fp , "\n" ) ; /* Compute and save the mutual information for the dependent variable with each individual independent variable candidate. Print the results, sort them, and print them again, this time sorted. */ fprintf ( fp , "\nInitial candidates, in order of appearance in data file" ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\n Variable Information" ) ; for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates for (i=0 ; i<ncases ; i++) work[i] = data[i*nvars+icand] ; if (ndiv > 0) criterion = mi_parzen->mut_inf ( work ) ; else criterion = mi_adapt->mut_inf ( work , 0 ) ; printf ( "\n%s = %.5lf", names[icand], criterion ) ; fprintf ( fp , "\n%31s %.5lf", names[icand], criterion ) ; sortwork[icand] = icand ; save_info[icand] = univar_info[icand] = criterion ; } // Initial list of all candidates if (mi_parzen != NULL) { delete mi_parzen ; mi_parzen = NULL ; } if (mi_adapt != NULL) { delete mi_adapt ; mi_adapt = NULL ; } fprintf ( fp , "\n" ) ; fprintf ( fp , "\nInitial candidates, in order of decreasing mutual information" ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\n Variable Information" ) ; qsortdsi ( 0 , n_indep_vars-1 , save_info , sortwork ) ; for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates k = sortwork[n_indep_vars-1-icand] ; // Index of sorted candidate fprintf ( fp , "\n%31s %.5lf", names[k], save_info[n_indep_vars-1-icand] ) ; } /* Initialize the 'kept' set to be the best variable, and then begin the main outer loop that adds variables one at a time */ kept[0] = sortwork[n_indep_vars-1] ; // Index of best single candidate crits[0] = save_info[n_indep_vars-1] ; reduns[0] = 0.0 ; nkept = 1 ; if (maxkept > n_indep_vars) // Guard against silly user maxkept = n_indep_vars ; while (nkept < maxkept) { fprintf ( fp , "\n" ) ; fprintf ( fp , "\nVariables so far Relevance Redundancy Criterion" ) ; for (i=0 ; i<nkept ; i++) fprintf ( fp , "\n%31s %10.5lf %10.5lf %10.5lf", names[kept[i]], crits[i] + reduns[i], reduns[i], crits[i] ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\nSearching for an additional candidate..." ) ; fprintf ( fp , "\n" ) ; fprintf ( fp , "\n Variable Relevance Redundancy Criterion" ) ; bestcrit = -1.e60 ; for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates for (i=0 ; i<nkept ; i++) { // Is this candidate already kept? if (kept[i] == icand) break ; } if (i < nkept) // If this candidate 'icand' is already kept continue ; // Skip it strcpy ( trial_name , names[icand] ) ; // Its name for printing for (i=0 ; i<ncases ; i++) // Get its cases work[i] = data[i*nvars+icand] ; if (ndiv > 0) { mi_parzen = new MutualInformationParzen ( ncases , work , ndiv ) ; mi_adapt = NULL ; assert ( mi_parzen != NULL ) ; } else { mi_adapt = new MutualInformationAdaptive ( ncases , work , 0 , 6.0 ) ; mi_parzen = NULL ; assert ( mi_adapt != NULL ) ; } relevance = univar_info[icand] ; // We saved it during initial printing printf ( "\n%s relevance = %.5lf", trial_name, relevance ) ; // Compute the redundancy of this candidate // This is the mean of its redundancy with all kept variables redundancy = 0.0 ; for (iother=0 ; iother<nkept ; iother++) { // Process entire kept set j = kept[iother] ; // Index of a variable in the kept set if (icand > j) // pair_found and pair_info are k = icand*(icand+1)/2+j ; // symmetric, so k is the index else // into them k = j*(j+1)/2+icand ; if (pair_found[k]) // If we already computed it redun = pair_info[k] ; // Don't do it again else { // First time for this pair, so compute for (i=0 ; i<ncases ; i++) // Get its cases work[i] = data[i*nvars+j] ; // Variable already in kept set if (ndiv > 0) redun = mi_parzen->mut_inf ( work ) ; else redun = mi_adapt->mut_inf ( work , 0 ) ; pair_found[k] = 1 ; // Flag that this pair has been computed pair_info[k] = redun ; // And save the MI for this pair } // Else must compute redundancy redundancy += redun ; printf ( "\n %s <-> %s redundancy = %.5lf", names[icand], names[j], redun ) ; } // For all kept variables, computing mean redundancy if (mi_parzen != NULL) { delete mi_parzen ; mi_parzen = NULL ; } if (mi_adapt != NULL) { delete mi_adapt ; mi_adapt = NULL ; } redundancy /= nkept ; // It is the mean across all kept printf ( "\nRedundancy = %.5lf", redundancy ) ; criterion = relevance - redundancy ; fprintf ( fp , "\n%31s %10.5lf %10.5lf %10.5lf", trial_name, relevance, redundancy, criterion ) ; if (criterion > bestcrit) { // Did we just set a new record? bestcrit = criterion ; // If so, update the record bestredun = redundancy ; // Needed for printing results later ibest = icand ; // Keep track of the winning candidate } } // For all candidates // We now have the best candidate if (bestcrit <= 0.0) break ; kept[nkept] = ibest ; crits[nkept] = bestcrit ; reduns[nkept] = bestredun ; ++nkept ; } // While adding new variables fprintf ( fp , "\n" ) ; fprintf ( fp , "\nFinal set Relevance Redundancy Criterion" ) ; for (i=0 ; i<nkept ; i++) fprintf ( fp , "\n%31s %10.5lf %10.5lf %10.5lf", names[kept[i]], crits[i] + reduns[i], reduns[i], crits[i] ) ; MEMTEXT ( "MI_CONT: Finish" ) ; fclose ( fp ) ; FREE ( work ) ; FREE ( kept ) ; FREE ( crits ) ; FREE ( reduns ) ; FREE ( sortwork ) ; FREE ( save_info ) ; FREE ( univar_info ) ; FREE ( pair_found ) ; FREE ( pair_info ) ; if (mi_parzen != NULL) delete mi_parzen ; if (mi_adapt != NULL) delete mi_adapt ; free_data ( nvars , names , data ) ; MEMCLOSE () ; printf ( "\n\nPress any key..." ) ; _getch () ; return EXIT_SUCCESS ; }