void calculate_ams_all_serial( word_count *casted_t, gpointer key_t ) { double count_t_c; gpointer key_c, value_t_c; word_count *casted_c; GHashTableIter iter_c; g_hash_table_iter_init( &iter_c, casted_t->links ); casted_t->entropy = calculate_entropy(casted_t->count, casted_t->links); while( g_hash_table_iter_next( &iter_c, &key_c, &value_t_c ) ){ casted_c = g_hash_table_lookup( c_dict, key_c ); count_t_c = *((double *)value_t_c); calculate_and_print_am( (int *)key_t, casted_t->id, (int *)key_c, casted_c->id, count_t_c, casted_t->count, casted_c->count, casted_t->entropy, casted_c->entropy ); } }
int main( int argc, char *argv[] ) { word_count *casted_c; GHashTableIter iter_c; //int *id_t, *id_c; gpointer key_c, value_t_c, value_t, key_t; if( argc != 2 ) { usage(); } perr( "Reading input file into hashmap...\n" ); read_input_file( argv[1] ); // File header perr( "Calculating association scores...\n" ); printf( "target\tid_target\tcontext\tid_context\tf_tc\tf_t\tf_c\t" ); printf( "cond_prob\tpmi\tnpmi\tlmi\ttscore\tzscore\tdice\tchisquare\t" ); printf( "loglike\taffinity\tentropy_target\tentropy_context\n" ); // First calculate all entropies for contexts g_hash_table_iter_init( &iter_c, c_dict ); while( g_hash_table_iter_next( &iter_c, &key_c, &value_t_c ) ){ casted_c = g_hash_table_lookup( c_dict, key_c ); casted_c->entropy = calculate_entropy(casted_c->count, casted_c->links); } g_hash_table_iter_init( &iter_t, t_dict ); nb_targets = g_hash_table_size( t_dict ); if( nb_threads > 1 ) { run_multi_threaded( &calculate_ams_all, nb_threads ); } else { while( g_hash_table_iter_next( &iter_t, &key_t, &value_t ) ){ calculate_ams_all_serial( (word_count *)value_t, key_t ); update_count(); } } // Clean and free to avoid memory leaks perr( "Finished, cleaning up...\n" ); g_hash_table_destroy( t_dict ); g_hash_table_destroy( c_dict ); // MUST be last to be destroyed, otherwise will destroy keys in previous dicts // and memory will leak from unreachable values g_hash_table_destroy( symbols_dict ); g_hash_table_destroy( inv_symbols_dict ); // no effect perra( "Number of targets: %d\n", idc_t ); perra( "Number of contexts: %d\n", idc_c ); perr( "You can now calculate similarities with command below\n"); perr( " ./calculate_similarity [OPTIONS] <out-file>\n\n" ); return 0; }
double calculate_entropy_file(PE_FILE *pe) { unsigned int byte_count[256] = {0}; unsigned int n, size; unsigned char buffer[1024]; n = size = 0; rewind(pe->handle); while((n = fread(buffer, 1, 1024, pe->handle)) != 0) { for (unsigned int i = 0; i < n; i++) byte_count[(int) buffer[i]]++, size++; } return calculate_entropy(byte_count, size); }