int main( int argc, char *argv[] ) { word_count *casted_c; GHashTableIter iter_c; //int *id_t, *id_c; gpointer key_c, value_t_c, value_t, key_t; if( argc != 2 ) { usage(); } perr( "Reading input file into hashmap...\n" ); read_input_file( argv[1] ); // File header perr( "Calculating association scores...\n" ); printf( "target\tid_target\tcontext\tid_context\tf_tc\tf_t\tf_c\t" ); printf( "cond_prob\tpmi\tnpmi\tlmi\ttscore\tzscore\tdice\tchisquare\t" ); printf( "loglike\taffinity\tentropy_target\tentropy_context\n" ); // First calculate all entropies for contexts g_hash_table_iter_init( &iter_c, c_dict ); while( g_hash_table_iter_next( &iter_c, &key_c, &value_t_c ) ){ casted_c = g_hash_table_lookup( c_dict, key_c ); casted_c->entropy = calculate_entropy(casted_c->count, casted_c->links); } g_hash_table_iter_init( &iter_t, t_dict ); nb_targets = g_hash_table_size( t_dict ); if( nb_threads > 1 ) { run_multi_threaded( &calculate_ams_all, nb_threads ); } else { while( g_hash_table_iter_next( &iter_t, &key_t, &value_t ) ){ calculate_ams_all_serial( (word_count *)value_t, key_t ); update_count(); } } // Clean and free to avoid memory leaks perr( "Finished, cleaning up...\n" ); g_hash_table_destroy( t_dict ); g_hash_table_destroy( c_dict ); // MUST be last to be destroyed, otherwise will destroy keys in previous dicts // and memory will leak from unreachable values g_hash_table_destroy( symbols_dict ); g_hash_table_destroy( inv_symbols_dict ); // no effect perra( "Number of targets: %d\n", idc_t ); perra( "Number of contexts: %d\n", idc_c ); perr( "You can now calculate similarities with command below\n"); perr( " ./calculate_similarity [OPTIONS] <out-file>\n\n" ); return 0; }
int main( int argc, char *argv[] ) { int argindex = treat_options( argc, argv ); if( argindex != argc - 2 ){ perr( "You must provide two filenames as arguments\n" ); usage(); } perr( "Reading input profiles into hashmap...\n" ); read_profiles_file( argv[ argindex + 1 ] ); perr( "Generalizing...\n" ); sim_file = open_file_read( argv[ argindex ] ); sim_index = get_index_column_name( sim_file, score_name ); if( sim_index < 4 ) { fprintf( stderr, "Column named \"%s\" not found!\nYou must ", score_name ); perr( "specify a valid -s option. Chose among SIM-FILE column headers.\n"); perr( "Remember that the 1st 4 fields cannot be used as scores.\n" ); usage(); } if( nb_threads > 1 ) { run_multi_threaded( &read_sim_and_generalize, nb_threads ); } else { perr( "Not using threads\n" ); read_sim_and_generalize_serial(); } // Clean and free to avoid memory leaks perr( "Finished, cleaning up...\n" ); fclose( sim_file ); g_hash_table_destroy( c_dict ); g_hash_table_destroy( t_dict ); if( t_filter ) { g_hash_table_destroy( t_filter ); } if( n_filter ) { g_hash_table_destroy( n_filter ); } if( c_filter ) { g_hash_table_destroy( c_filter ); } return 0; }
void run() { run_single_threaded(); run_multi_threaded(); }