int main( int argc, char *argv[] ) {
  word_count *casted_c;
  GHashTableIter iter_c;
  //int *id_t, *id_c; 
  gpointer key_c, value_t_c, value_t, key_t;  
  if( argc != 2 ) { usage(); }
  perr( "Reading input file into hashmap...\n" );
  read_input_file( argv[1] ); 

  // File header
  perr( "Calculating association scores...\n" );
  printf( "target\tid_target\tcontext\tid_context\tf_tc\tf_t\tf_c\t" );
  printf( "cond_prob\tpmi\tnpmi\tlmi\ttscore\tzscore\tdice\tchisquare\t" );
  printf( "loglike\taffinity\tentropy_target\tentropy_context\n" );
  // First calculate all entropies for contexts
  g_hash_table_iter_init( &iter_c, c_dict );
  while( g_hash_table_iter_next( &iter_c, &key_c, &value_t_c ) ){
    casted_c = g_hash_table_lookup( c_dict, key_c );
    casted_c->entropy = calculate_entropy(casted_c->count, casted_c->links);      
  }
  g_hash_table_iter_init( &iter_t, t_dict );
  nb_targets = g_hash_table_size( t_dict );
  if( nb_threads > 1 ) {
    run_multi_threaded( &calculate_ams_all, nb_threads );
  }
  else {
    while( g_hash_table_iter_next( &iter_t, &key_t, &value_t ) ){
      calculate_ams_all_serial( (word_count *)value_t, key_t );
      update_count();
    }
  }
  // Clean and free to avoid memory leaks
  perr( "Finished, cleaning up...\n" );
  g_hash_table_destroy( t_dict );
  g_hash_table_destroy( c_dict );
  // MUST be last to be destroyed, otherwise will destroy keys in previous dicts 
  // and memory will leak from unreachable values
  g_hash_table_destroy( symbols_dict );   
  g_hash_table_destroy( inv_symbols_dict ); // no effect  
  
  perra( "Number of targets: %d\n", idc_t );
  perra( "Number of contexts: %d\n", idc_c );
  perr( "You can now calculate similarities with command below\n");
  perr( "  ./calculate_similarity [OPTIONS] <out-file>\n\n" );
  return 0;
}
Exemple #2
0
int main( int argc, char *argv[] ) {  
  int argindex = treat_options( argc, argv );
  if( argindex != argc - 2 ){
    perr( "You must provide two filenames as arguments\n" );
    usage();
  }
  perr( "Reading input profiles into hashmap...\n" );
  read_profiles_file( argv[ argindex + 1 ] );  
  perr( "Generalizing...\n" );
  sim_file = open_file_read( argv[ argindex ] );
  sim_index = get_index_column_name( sim_file, score_name );
  if( sim_index < 4 ) {
    fprintf( stderr, "Column named \"%s\" not found!\nYou must ", score_name );
    perr( "specify a valid -s option. Chose among SIM-FILE column headers.\n");
    perr( "Remember that the 1st 4 fields cannot be used as scores.\n" ); 
    usage();  
  }
  if( nb_threads > 1 ) {
    run_multi_threaded( &read_sim_and_generalize, nb_threads );    
  }
  else {
    perr( "Not using threads\n" );
    read_sim_and_generalize_serial();
  }
  // Clean and free to avoid memory leaks
  perr( "Finished, cleaning up...\n" );
  fclose( sim_file );
  g_hash_table_destroy( c_dict );  
  g_hash_table_destroy( t_dict );
  if( t_filter ) {
    g_hash_table_destroy( t_filter );
  }
  if( n_filter ) {
    g_hash_table_destroy( n_filter );
  }
  if( c_filter ) {
    g_hash_table_destroy( c_filter );
  }
  return 0;
}
Exemple #3
0
 void run() {
    run_single_threaded();
    run_multi_threaded();
 }