int main(void) { read_data(); printf("\n"); printf("== 学生数 ==\n"); printf("%d 人\n", max_student); if (max_student <= 0){ printf("データ数が少なすぎます。\n"); return (-1); } printf("\n"); printf("== 出席番号順の名簿 ==\n"); sort_by_id(); print_data(); printf("\n"); printf("== 合計点による成績順位表 ==\n"); sort_by_total(); print_data(); printf("\n"); printf("== 平均点・最高点・最底辺 ==\n"); print_stat(); return (0); }
int main (int argc, char *argv[]) { char in[10]; int id = 1; User *s; unsigned num_users; while (1) { printf ("1. add user\n"); printf ("2. find user\n"); printf ("3. delete user\n"); printf ("4. delete all users\n"); printf ("5. sort items by name\n"); printf ("6. sort items by id\n"); printf ("7. print users\n"); printf ("8. count users\n"); gets (in); switch (atoi (in)) { case 1: printf ("name?\n"); add_user (id++, gets (in)); break; case 2: printf ("id?\n"); s = find_user (atoi (gets (in))); printf ("user: %s\n", s ? s->name : "unknown"); break; case 3: printf ("id?\n"); s = find_user (atoi (gets (in))); if (s) delete_user (s); else printf ("id unknown\n"); break; case 4: delete_all (); break; case 5: sort_by_name (); break; case 6: sort_by_id (); break; case 7: print_users (); break; case 8: num_users = HASH_COUNT (users); printf ("there are %u users\n", num_users); break; } } }
static int sort_by_name_and_prop_and_id(WayX *a,WayX *b) { int compare; index_t a_name=a->way.name; index_t b_name=b->way.name; if(a_name<b_name) return(-1); else if(a_name>b_name) return(1); compare=WaysCompare(&a->way,&b->way); if(compare) return(compare); return(sort_by_id(a,b)); }
int main(int argc, char **argv) { setlocale(LC_ALL, ""); // Comment-out on non-Posix systems clock_t time_start = clock(); time_t time_t_start; time(&time_t_start); argv_0_basename = basename(argv[0]); get_usage_string(usage, USAGE_LEN); // This is a big scary string, so build it elsewhere //printf("sizeof(cmd_args)=%zd\n", sizeof(cmd_args)); parse_cmd_args(argc, argv, usage, &cmd_args); if (cmd_args.class_algo == EXCHANGE || cmd_args.class_algo == EXCHANGE_BROWN) memusage += sizeof(float) * ENTROPY_TERMS_MAX; // We'll build the precomputed entropy terms after reporting memusage struct_model_metadata global_metadata; // The list of unique words should always include <s>, unknown word, and </s> map_update_count(&word_map, UNKNOWN_WORD, 0, 0); // Should always be first map_update_count(&word_map, "<s>", 0, 1); map_update_count(&word_map, "</s>", 0, 2); // Open input FILE *in_train_file = stdin; if (in_train_file_string) in_train_file = fopen(in_train_file_string, "r"); if (in_train_file == NULL) { fprintf(stderr, "%s: Error: Unable to open input file %s\n", argv_0_basename, in_train_file_string); fflush(stderr); exit(15); } // Process input sentences size_t input_memusage = 0; const struct_model_metadata input_model_metadata = process_input(cmd_args, in_train_file, &word_map, &initial_bigram_map, &input_memusage); memusage += input_memusage; fclose(in_train_file); clock_t time_input_processed = clock(); if (cmd_args.verbose >= -1) fprintf(stderr, "%s: Corpus processed in %'.2f CPU secs. %'lu lines, %'u types, %'lu tokens, current memusage: %'.1fMB\n", argv_0_basename, (double)(time_input_processed - time_start)/CLOCKS_PER_SEC, input_model_metadata.line_count, input_model_metadata.type_count, input_model_metadata.token_count, (double)memusage / 1048576); fflush(stderr); global_metadata.token_count = input_model_metadata.token_count; global_metadata.type_count = map_count(&word_map); // Filter out infrequent words, reassign word_id's, and build a mapping from old word_id's to new word_id's sort_by_count(&word_map); word_id_t * restrict word_id_remap = calloc(sizeof(word_id_t), input_model_metadata.type_count); get_ids(&word_map, word_id_remap); word_id_t number_of_deleted_words = filter_infrequent_words(cmd_args, &global_metadata, &word_map, word_id_remap); // Get list of unique words char * * restrict word_list = (char **)malloc(sizeof(char*) * global_metadata.type_count); memusage += sizeof(char*) * global_metadata.type_count; reassign_word_ids(&word_map, word_list, word_id_remap); get_keys(&word_map, word_list); sort_by_id(&word_map); // Check or set number of classes if (cmd_args.num_classes >= global_metadata.type_count) { // User manually set number of classes is too low fprintf(stderr, "%s: Error: Number of classes (%u) is not less than vocabulary size (%u). Decrease the value of --classes\n", argv_0_basename, cmd_args.num_classes, global_metadata.type_count); fflush(stderr); exit(3); } else if (cmd_args.num_classes == 0) { // User did not manually set number of classes at all cmd_args.num_classes = (wclass_t) (sqrt(global_metadata.type_count) * 1.2); } // Build array of word_counts word_count_t * restrict word_counts = malloc(sizeof(word_count_t) * global_metadata.type_count); memusage += sizeof(word_count_t) * global_metadata.type_count; build_word_count_array(&word_map, word_list, word_counts, global_metadata.type_count); // Initialize clusters, and possibly read-in external class file wclass_t * restrict word2class = malloc(sizeof(wclass_t) * global_metadata.type_count); memusage += sizeof(wclass_t) * global_metadata.type_count; init_clusters(cmd_args, global_metadata.type_count, word2class, word_counts, word_list); if (initial_class_file != NULL) import_class_file(&word_map, word2class, initial_class_file, cmd_args.num_classes); // Overwrite subset of word mappings, from user-provided initial_class_file // Remap word_id's in initial_bigram_map remap_and_rev_bigram_map(&initial_bigram_map, &new_bigram_map, &new_bigram_map_rev, word_id_remap, map_find_id(&word_map, UNKNOWN_WORD, -1)); global_metadata.start_sent_id = map_find_id(&word_map, "<s>", -1);; // need this for tallying emission probs global_metadata.end_sent_id = map_find_id(&word_map, "</s>", -1);; // need this for tallying emission probs global_metadata.line_count = map_find_count(&word_map, "</s>"); // Used for calculating perplexity if (global_metadata.line_count == 0) { fprintf(stderr, "%s: Warning: Number of lines is 0. Include <s> and </s> in your ngram counts, or perplexity values will be unreliable.\n", argv_0_basename); fflush(stderr); } //printf("init_bigram_map hash_count=%u\n", HASH_COUNT(initial_bigram_map)); fflush(stdout); //printf("new_bigram_map hash_count=%u\n", HASH_COUNT(new_bigram_map)); fflush(stdout); free(word_id_remap); memusage -= sizeof(word_id_t) * input_model_metadata.type_count; delete_all(&word_map); // static delete_all_bigram(&initial_bigram_map); // static memusage -= input_memusage; // Initialize and set word bigram listing clock_t time_bigram_start = clock(); size_t bigram_memusage = 0; size_t bigram_rev_memusage = 0; struct_word_bigram_entry * restrict word_bigrams = NULL; struct_word_bigram_entry * restrict word_bigrams_rev = NULL; if (cmd_args.verbose >= -1) fprintf(stderr, "%s: Word bigram listing ... ", argv_0_basename); fflush(stderr); #pragma omp parallel sections // Both bigram listing and reverse bigram listing can be done in parallel { #pragma omp section { //sort_bigrams(&new_bigram_map); // speeds things up later word_bigrams = calloc(global_metadata.type_count, sizeof(struct_word_bigram_entry)); memusage += sizeof(struct_word_bigram_entry) * global_metadata.type_count; bigram_memusage = set_bigram_counts(word_bigrams, new_bigram_map); // Copy entries in word_counts to struct_word_bigram_entry.headword_count since that struct entry is already loaded when clustering for (word_id_t word = 0; word < global_metadata.type_count; word++) word_bigrams[word].headword_count = word_counts[word]; } // Initialize and set *reverse* word bigram listing #pragma omp section { if (cmd_args.rev_alternate) { // Don't bother building this if it won't be used //sort_bigrams(&new_bigram_map_rev); // speeds things up later word_bigrams_rev = calloc(global_metadata.type_count, sizeof(struct_word_bigram_entry)); memusage += sizeof(struct_word_bigram_entry) * global_metadata.type_count; bigram_rev_memusage = set_bigram_counts(word_bigrams_rev, new_bigram_map_rev); // Copy entries in word_counts to struct_word_bigram_entry.headword_count since that struct entry is already loaded when clustering for (word_id_t word = 0; word < global_metadata.type_count; word++) word_bigrams_rev[word].headword_count = word_counts[word]; } } } delete_all_bigram(&new_bigram_map); delete_all_bigram(&new_bigram_map_rev); memusage += bigram_memusage + bigram_rev_memusage; clock_t time_bigram_end = clock(); if (cmd_args.verbose >= -1) fprintf(stderr, "in %'.2f CPU secs. Bigram memusage: %'.1f MB\n", (double)(time_bigram_end - time_bigram_start)/CLOCKS_PER_SEC, (bigram_memusage + bigram_rev_memusage)/(double)1048576); fflush(stderr); //print_word_bigrams(global_metadata, word_bigrams, word_list); // Build <v,c> counts, which consists of a word followed by a given class word_class_count_t * restrict word_class_counts = calloc(1 + cmd_args.num_classes * global_metadata.type_count , sizeof(word_class_count_t)); if (word_class_counts == NULL) { fprintf(stderr, "%s: Error: Unable to allocate enough memory for <v,c>. %'.1f MB needed. Maybe increase --min-count\n", argv_0_basename, ((cmd_args.num_classes * global_metadata.type_count * sizeof(word_class_count_t)) / (double)1048576 )); fflush(stderr); exit(13); } memusage += cmd_args.num_classes * global_metadata.type_count * sizeof(word_class_count_t); fprintf(stderr, "%s: Allocating %'.1f MB for word_class_counts: num_classes=%u x type_count=%u x sizeof(w-cl-count_t)=%zu\n", argv_0_basename, (double)(cmd_args.num_classes * global_metadata.type_count * sizeof(word_class_count_t)) / 1048576 , cmd_args.num_classes, global_metadata.type_count, sizeof(word_class_count_t)); fflush(stderr); build_word_class_counts(cmd_args, word_class_counts, word2class, word_bigrams, global_metadata.type_count/*, word_list*/); //print_word_class_counts(cmd_args, global_metadata, word_class_counts); // Build reverse: <c,v> counts: class followed by word. This and the normal one are both pretty fast, so no need to parallelize this word_class_count_t * restrict word_class_rev_counts = NULL; if (cmd_args.rev_alternate) { // Don't bother building this if it won't be used word_class_rev_counts = calloc(1 + cmd_args.num_classes * global_metadata.type_count , sizeof(word_class_count_t)); if (word_class_rev_counts == NULL) { fprintf(stderr, "%s: Warning: Unable to allocate enough memory for <v,c>. %'.1f MB needed. Falling back to --rev-alternate 0\n", argv_0_basename, ((cmd_args.num_classes * global_metadata.type_count * sizeof(word_class_count_t)) / (double)1048576 )); fflush(stderr); cmd_args.rev_alternate = 0; } else { memusage += cmd_args.num_classes * global_metadata.type_count * sizeof(word_class_count_t); fprintf(stderr, "%s: Allocating %'.1f MB for word_class_rev_counts: num_classes=%u x type_count=%u x sizeof(w-cl-count_t)=%zu\n", argv_0_basename, (double)(cmd_args.num_classes * global_metadata.type_count * sizeof(word_class_count_t)) / 1048576 , cmd_args.num_classes, global_metadata.type_count, sizeof(word_class_count_t)); fflush(stderr); build_word_class_counts(cmd_args, word_class_rev_counts, word2class, word_bigrams_rev, global_metadata.type_count/*, word_list*/); } } // Calculate memusage for count_arrays for (unsigned char i = 1; i <= cmd_args.max_array; i++) { memusage += 2 * (powi(cmd_args.num_classes, i) * sizeof(wclass_count_t)); //printf("11 memusage += %zu (now=%zu) count_arrays\n", 2 * (powi(cmd_args.num_classes, i) * sizeof(wclass_count_t)), memusage); fflush(stdout); } clock_t time_model_built = clock(); if (cmd_args.verbose >= -1) fprintf(stderr, "%s: Finished loading %'lu tokens and %'u types (%'u filtered) from %'lu lines in %'.2f CPU secs\n", argv_0_basename, global_metadata.token_count, global_metadata.type_count, number_of_deleted_words, global_metadata.line_count, (double)(time_model_built - time_start)/CLOCKS_PER_SEC); fflush(stderr); if (cmd_args.verbose >= -1) fprintf(stderr, "%s: Approximate memory usage at clustering: %'.1fMB\n", argv_0_basename, (double)memusage / 1048576); fflush(stderr); cluster(cmd_args, global_metadata, word_counts, word_list, word2class, word_bigrams, word_bigrams_rev, word_class_counts, word_class_rev_counts); // Now print the final word2class mapping if (cmd_args.verbose >= 0) { FILE *out_file = stdout; if (out_file_string) out_file = fopen(out_file_string, "w"); if (out_file == NULL) { fprintf(stderr, "%s: Error: Unable to open output file %s\n", argv_0_basename, out_file_string); fflush(stderr); exit(16); } if (cmd_args.class_algo == EXCHANGE && (!cmd_args.print_word_vectors)) { print_words_and_classes(out_file, global_metadata.type_count, word_list, word_counts, word2class, (int)cmd_args.class_offset, cmd_args.print_freqs); } else if (cmd_args.class_algo == EXCHANGE && cmd_args.print_word_vectors) { print_words_and_vectors(out_file, cmd_args, global_metadata, word_list, word2class, word_bigrams, word_bigrams_rev, word_class_counts, word_class_rev_counts); } fclose(out_file); } clock_t time_clustered = clock(); time_t time_t_end; time(&time_t_end); double time_secs_total = difftime(time_t_end, time_t_start); if (cmd_args.verbose >= -1) fprintf(stderr, "%s: Finished clustering in %'.2f CPU seconds. Total wall clock time was about %lim %lis\n", argv_0_basename, (double)(time_clustered - time_model_built)/CLOCKS_PER_SEC, (long)time_secs_total/60, ((long)time_secs_total % 60) ); free(word2class); free(word_bigrams); free(word_list); free(word_counts); exit(0); }