Example #1
0
void calculate_ams_all_serial( word_count *casted_t, gpointer key_t ) {
  double count_t_c;
  gpointer key_c, value_t_c;
  word_count *casted_c;
  GHashTableIter iter_c;  
  g_hash_table_iter_init( &iter_c, casted_t->links );
  casted_t->entropy = calculate_entropy(casted_t->count, casted_t->links);
  while( g_hash_table_iter_next( &iter_c, &key_c, &value_t_c ) ){
    casted_c = g_hash_table_lookup( c_dict, key_c );
    count_t_c = *((double *)value_t_c);
    calculate_and_print_am( (int *)key_t, casted_t->id, (int *)key_c, 
                      casted_c->id, count_t_c, casted_t->count, casted_c->count, 
                      casted_t->entropy, casted_c->entropy );
  }
}
Example #2
0
int main( int argc, char *argv[] ) {
  word_count *casted_c;
  GHashTableIter iter_c;
  //int *id_t, *id_c; 
  gpointer key_c, value_t_c, value_t, key_t;  
  if( argc != 2 ) { usage(); }
  perr( "Reading input file into hashmap...\n" );
  read_input_file( argv[1] ); 

  // File header
  perr( "Calculating association scores...\n" );
  printf( "target\tid_target\tcontext\tid_context\tf_tc\tf_t\tf_c\t" );
  printf( "cond_prob\tpmi\tnpmi\tlmi\ttscore\tzscore\tdice\tchisquare\t" );
  printf( "loglike\taffinity\tentropy_target\tentropy_context\n" );
  // First calculate all entropies for contexts
  g_hash_table_iter_init( &iter_c, c_dict );
  while( g_hash_table_iter_next( &iter_c, &key_c, &value_t_c ) ){
    casted_c = g_hash_table_lookup( c_dict, key_c );
    casted_c->entropy = calculate_entropy(casted_c->count, casted_c->links);      
  }
  g_hash_table_iter_init( &iter_t, t_dict );
  nb_targets = g_hash_table_size( t_dict );
  if( nb_threads > 1 ) {
    run_multi_threaded( &calculate_ams_all, nb_threads );
  }
  else {
    while( g_hash_table_iter_next( &iter_t, &key_t, &value_t ) ){
      calculate_ams_all_serial( (word_count *)value_t, key_t );
      update_count();
    }
  }
  // Clean and free to avoid memory leaks
  perr( "Finished, cleaning up...\n" );
  g_hash_table_destroy( t_dict );
  g_hash_table_destroy( c_dict );
  // MUST be last to be destroyed, otherwise will destroy keys in previous dicts 
  // and memory will leak from unreachable values
  g_hash_table_destroy( symbols_dict );   
  g_hash_table_destroy( inv_symbols_dict ); // no effect  
  
  perra( "Number of targets: %d\n", idc_t );
  perra( "Number of contexts: %d\n", idc_c );
  perr( "You can now calculate similarities with command below\n");
  perr( "  ./calculate_similarity [OPTIONS] <out-file>\n\n" );
  return 0;
}
Example #3
0
double calculate_entropy_file(PE_FILE *pe)
{
        unsigned int byte_count[256] = {0};
	unsigned int n, size;
	unsigned char buffer[1024];

	n = size = 0;

	rewind(pe->handle);

	while((n = fread(buffer, 1, 1024, pe->handle)) != 0)
        {
		for (unsigned int i = 0; i < n; i++)
                	byte_count[(int) buffer[i]]++, size++;
        }

        return calculate_entropy(byte_count, size);
}