/** * @brief Opens a dictionary file and returns the respective Dictionary object * * @param name name of the file containing the dictionary * @return the Dictionary object or NULL in case of error. */ Dictionary* dictionary_open(const char *name) { Dictionary *dic; FILE *gzf; gzf = gzopen(name, "rb"); if (!gzf) report_error("error opening file %s for reading.\n", name); dic = dictionary_load(gzf); gzclose(gzf); return dic; }
/** Testuje wczytywanie drzewa. @param state Środowisko testowe. */ static void dictionary_load_test(void** state) { struct dictionary *dict = NULL; push_word_to_io_mock(L"ciupagą*^^^^^^^\n13\na*b*3*2\n"); dict = dictionary_load(stdin); pop_remaining_chars(); assert_non_null(dict); assert_true(dictionary_find(dict, L"ciupagą")); assert_int_equal(dictionary_hints_max_cost(dict, 2), 13); dictionary_done(dict); }
//TODO - create a test with berlarikah, to test return suffix char *all_tests() { mu_suite_start(); dictionary_load(dictionary_fullpath("data/kata-dasar.txt")); mu_run_test(test_is_plural); mu_run_test(test_plural_parts); mu_run_test(test_stem_plural_word_when_both_words_are_root_words_and_the_same); mu_run_test(test_stem_plural_word_when_one_word_has_suffixes); return NULL; }
/** * Ładuje słownik ze ścieżki. * W przypadku błędu systemowego, wypisuje również systemową informację * o błędzie. * @param [in] path Ścieżka. * @param [out] dict Wskaźnik na wskaźnik, pod którym zostanie zapisany * wskaźnik na wczytany słownik. * Słownik musi zostać zwolniony przez użytkownika. * @return 0 gdy nie ma błędu, niezerowa wartość w przypadku błędu. */ int load_dictionary(const char * path, struct dictionary ** dict) { FILE * file = fopen(path, "r"); if (NULL == file) { error(0, errno, "Error reading dict from path %s", path); return errno; } *dict = dictionary_load(file); fclose(file); if (NULL == *dict) return -1; return 0; }
/** Funkcja main. Główna funkcja programu dict-check. */ int main(int argc, const char **argv) { setlocale(LC_ALL, "pl_PL.UTF-8"); if(argc < 2 || argc > 3) { fwprintf(stderr, L"Błędna liczba argumentów!\n"); usage(); } if(argc == 3 && (argv[1][0] != '-' || argv[1][1] != 'v' || argv[1][2] != '\0')) { fwprintf(stderr, L"Błędny argument!\n"); usage(); } FILE *f = fopen(argv[argc - 1], "r"); if(!f) { fwprintf(stderr, L"Nie udało się załadować pliku %s!\n", argv[argc - 1]); usage(); } bool v_option = argc == 3; struct dictionary * dict = dictionary_load(f); fclose(f); if(dict == NULL) { fwprintf(stderr, L"Nie udało się załadować pliku %s!\n", argv[argc - 1]); usage(); } vector *buffer = read_input(); int line_number = 1; int char_number = 1; for(int index = 0; index < vector_size(buffer); index++, char_number++) { wchar_t c = ((wchar_t *)vector_content(buffer))[index]; if(c == L'\n') { line_number++; char_number = 0; } if(!iswalpha(c)) wprintf(L"%lc", c); else parse_word(&index, &char_number, &line_number, vector_content(buffer), dict, v_option); } vector_done(buffer); dictionary_done(dict); return 0; }
char *all_tests() { mu_suite_start(); char *path = dictionary_fullpath("data/kata-dasar.txt"); dictionary_load(path); free(path); mu_run_test(test_stem_singular_word_does_not_need_stemming); mu_run_test(test_stem_singular_word_returns_original_word_when_cannot_stem); mu_run_test(test_stem_singular_word_removes_suffixes); mu_run_test(test_stem_singular_word_removes_plain_prefixes); mu_run_test(test_stem_singular_word_removes_complex_prefixes_1); mu_run_test(test_stem_singular_word_removes_complex_prefixes_2); mu_run_test(test_stem_singular_word_removes_complex_prefixes_3); mu_run_test(test_stem_singular_word_removes_complex_prefixes_4); mu_run_test(test_stem_singular_word_removes_complex_prefixes_5); mu_run_test(test_stem_singular_word_removes_complex_prefixes_6); mu_run_test(test_stem_singular_word_removes_complex_prefixes_7); mu_run_test(test_stem_singular_word_removes_complex_prefixes_8); mu_run_test(test_stem_singular_word_removes_complex_prefixes_9); mu_run_test(test_stem_singular_word_removes_complex_prefixes_10); mu_run_test(test_stem_singular_word_removes_complex_prefixes_11); mu_run_test(test_stem_singular_word_removes_complex_prefixes_12); mu_run_test(test_stem_singular_word_removes_complex_prefixes_13); mu_run_test(test_stem_singular_word_removes_complex_prefixes_14); mu_run_test(test_stem_singular_word_removes_complex_prefixes_15); mu_run_test(test_stem_singular_word_removes_complex_prefixes_16); mu_run_test(test_stem_singular_word_removes_complex_prefixes_17); mu_run_test(test_stem_singular_word_removes_complex_prefixes_18); mu_run_test(test_stem_singular_word_removes_complex_prefixes_19); mu_run_test(test_stem_singular_word_removes_complex_prefixes_20); mu_run_test(test_stem_singular_word_uses_precedence_adjustment); mu_run_test(test_stem_singular_word_uses_precedence_adjustment_2); return NULL; }
int cr_main(int argc, char** argv) { struct { uint32_t m_size; uint8_t m_filt; uint8_t m_prec; } __attribute__((packed)) block_header; const char* src_name = "<stdin>"; const char* dst_name = "<stdout>"; FILE* src_file; FILE* dst_file; data_block_t ib = INITIAL_BLOCK; data_block_t ob = INITIAL_BLOCK; data_block_t* xb; data_block_t* yb; uint32_t src_size; uint32_t dst_size; int filt = 0; int enc; data_block_t dic_xb = INITIAL_BLOCK; data_block_t dic_yb = INITIAL_BLOCK; int nword; struct timeval time_start; struct timeval time_end; double cost_time; gettimeofday(&time_start, NULL); src_file = stdin; dst_file = stdout; #if defined(_WIN32) || defined(_WIN64) /* we need to set stdin/stdout to binary mode under windows */ setmode(fileno(stdin), O_BINARY); setmode(fileno(stdout), O_BINARY); #endif /* reset global models for compressing/decompressing */ reset_models(); /* process arguments */ if((argc = cr_process_arguments(argc, argv)) == 0) { return -1; } /* start! */ fprintf(stderr, "%s\n", cr_start_info); if(argc >=2 && argc <= 4 && strcmp(argv[1], "e") == 0) { /* encode */ enc = 1; if(argc >= 3) src_name = argv[2], src_file = fopen(src_name, "rb"); if(argc >= 4) dst_name = argv[3], dst_file = fopen(dst_name, "wb"); if(src_file == stdin) { /* copy input data to temporary file, since stdin doesn't support rewind() */ data_block_reserve(&ib, 1048576); src_file = tmpfile(); while((ib.m_size = fread(ib.m_data, 1, ib.m_capacity, stdin)) > 0) { fwrite(ib.m_data, 1, ib.m_size, src_file); } data_block_destroy(&ib); rewind(src_file); ib = INITIAL_BLOCK; } if(src_file != NULL && dst_file != NULL) { write_magic(dst_file); fprintf(stderr, "compressing %s to %s, block_size = %uMB...\n", src_name, dst_name, cr_split_size / 1048576); /* build static dictionary */ fprintf(stderr, "%s\n", "-> building static dictionary..."); dicpick(src_file, &dic_xb); rewind(src_file); nword = dictionary_load((char*)dic_xb.m_data, 1); /* encode static dictionary */ dic_lcp_encode(&dic_xb); lzencode(&dic_xb, &dic_yb, 0); reset_models(); fprintf(stderr, "added %d words to dictionary, compressed size = %u bytes\n", nword, dic_yb.m_size); /* write static dictionary to dst_file */ fwrite(&dic_yb.m_size, sizeof(dic_yb.m_size), 1, dst_file); fwrite( dic_yb.m_data, 1, dic_yb.m_size, dst_file); data_block_destroy(&dic_xb); data_block_destroy(&dic_yb); while(!ferror(src_file) && !ferror(dst_file) && !feof(src_file)) { xb = &ib; yb = &ob; data_block_resize(xb, cr_split_size); /* read blocks */ xb->m_size = fread(xb->m_data, 1, cr_split_size, src_file); /* precompress with filters */ if(cr_filt_enable) { filt = filter_inplace(xb->m_data, xb->m_size, FILTER_ENC); } /* encode */ data_block_resize(yb, 0); dictionary_encode(xb, yb); if(!cr_prec_enable) { swap_xyblock(&xb, &yb); data_block_resize(yb, 0); lzencode(xb, yb, !ferror(stderr)); } /* write blocks */ if(yb->m_size > 0) { block_header.m_size = yb->m_size; block_header.m_filt = filt; block_header.m_prec = cr_prec_enable; fwrite(&block_header, sizeof(block_header), 1, dst_file); fwrite(yb->m_data, 1, yb->m_size, dst_file); } } if(ferror(src_file) || ferror(dst_file)) { perror("ferror()"); return -1; } } else { perror("fopen()"); return -1; } src_size = ftell(src_file); dst_size = ftell(dst_file); fclose(src_file); fclose(dst_file); } else if(argc >= 2 && argc <= 4 && strcmp(argv[1], "d") == 0) { /* decode */ enc = 0; if(argc >= 3) src_name = argv[2], src_file = fopen(src_name, "rb"); if(argc >= 4) dst_name = argv[3], dst_file = fopen(dst_name, "wb"); if(src_file == stdin) { /* copy input data to temporary file, since stdin doesn't support rewind() */ data_block_reserve(&ib, 1048576); src_file = tmpfile(); while((ib.m_size = fread(ib.m_data, 1, ib.m_capacity, stdin)) > 0) { fwrite(ib.m_data, 1, ib.m_size, src_file); } data_block_destroy(&ib); rewind(src_file); ib = INITIAL_BLOCK; } if(src_file != NULL && dst_file != NULL) { if(!check_magic(src_file)) { fprintf(stderr, "%s\n", "check_magic() failed."); fclose(src_file); fclose(dst_file); return -1; } fprintf(stderr, "decompressing %s to %s...\n", src_name, dst_name); /* decode static dictionary */ fprintf(stderr, "%s\n", "-> decoding static dictionary..."); /* read size of static dictionary from src_file */ fread(&dic_yb.m_size, sizeof(dic_yb.m_size), 1, src_file); /* read static dictionary from src_file */ data_block_resize(&dic_yb, dic_yb.m_size); fread(dic_yb.m_data, 1, dic_yb.m_size, src_file); /* decode static dictionary */ lzdecode(&dic_yb, &dic_xb, 0); reset_models(); dic_lcp_decode(&dic_xb); dictionary_load((char*)dic_xb.m_data, 0); data_block_destroy(&dic_xb); data_block_destroy(&dic_yb); while(!ferror(src_file) && !ferror(dst_file) && !feof(src_file)) { xb = &ib; yb = &ob; /* read blocks */ if(fread(&block_header, sizeof(block_header), 1, src_file) != 1) { break; } data_block_resize(yb, block_header.m_size); yb->m_size = fread(yb->m_data, 1, yb->m_size, src_file); /* decode */ if(!block_header.m_prec) { data_block_resize(xb, 0); lzdecode(yb, xb, !ferror(stderr)); swap_xyblock(&xb, &yb); } data_block_resize(xb, 0); dictionary_decode(yb, xb, dst_file); /* precompress with filters */ if(block_header.m_filt) { filter_inplace(xb->m_data, xb->m_size, FILTER_DEC); } /* write blocks */ if(xb->m_size > 0) { fwrite(xb->m_data, 1, xb->m_size, dst_file); } } if(ferror(src_file) || ferror(dst_file)) { perror("ferror()"); return -1; } } else { perror("fopen()"); return -1; } src_size = ftell(src_file); dst_size = ftell(dst_file); fclose(src_file); fclose(dst_file); } else { /* bad argument! */ fprintf(stderr, "%s\n", cr_usage_info); return -1; } data_block_destroy(&ib); data_block_destroy(&ob); gettimeofday(&time_end, NULL); cost_time = (time_end.tv_sec - time_start.tv_sec) + (time_end.tv_usec - time_start.tv_usec) / 1000000.0; fprintf(stderr, "%u bytes => %u bytes\n\n", src_size, dst_size); if(enc) { fprintf(stderr, "encode-speed: %.3lf MB/s\n", src_size / 1048576 / cost_time); fprintf(stderr, "cost-time: %.3lf s\n", cost_time); fprintf(stderr, "compress-ratio: %.3lf\n", (double)dst_size / src_size); fprintf(stderr, "bpb: %.3lf\n", (double)dst_size / src_size * 8); } else { fprintf(stderr, "decode-speed: %.3lf MB/s\n", dst_size / 1048576 / cost_time); fprintf(stderr, "cost-time: %.3lf s\n", cost_time); fprintf(stderr, "compress-ratio: %.3lf\n", (double)src_size / dst_size); fprintf(stderr, "bpb: %.3lf\n", (double)src_size / dst_size * 8); } return 0; }
char *all_tests() { mu_suite_start(); char *path = dictionary_fullpath("data/kata-dasar.txt"); dictionary_load(path); free(path); mu_run_test(test_remove_plain_prefix_returns_0_if_word_notin_dictionary) mu_run_test(test_remove_plain_prefix_di); mu_run_test(test_remove_plain_prefix_ke); mu_run_test(test_remove_plain_prefix_se); mu_run_test(test_remove_complex_prefix_rule1_a); mu_run_test(test_remove_complex_prefix_rule1_b); mu_run_test(test_remove_complex_prefix_rule1_a_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule2); mu_run_test(test_remove_complex_prefix_rule2_excludes_er); mu_run_test(test_remove_complex_prefix_rule2_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule3_only_includes_er); mu_run_test(test_remove_complex_prefix_rule3_only_includes_er_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule3_only_includes_er_not_stemmed); mu_run_test(test_remove_complex_prefix_rule4); mu_run_test(test_remove_complex_prefix_rule4_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule4_not_stemmed); mu_run_test(test_remove_complex_prefix_rule5); mu_run_test(test_remove_complex_prefix_rule5_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule5_not_stemmed); mu_run_test(test_remove_complex_prefix_rule6a); mu_run_test(test_remove_complex_prefix_rule6a_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule6a_not_stemmed); mu_run_test(test_remove_complex_prefix_rule6b); mu_run_test(test_remove_complex_prefix_rule6b_not_stemmed); mu_run_test(test_remove_complex_prefix_rule7); mu_run_test(test_remove_complex_prefix_rule7_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule8); mu_run_test(test_remove_complex_prefix_rule8_excludes_er); mu_run_test(test_remove_complex_prefix_rule8_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule9); mu_run_test(test_remove_complex_prefix_rule9_partially_stemmed); mu_run_test(test_remove_complex_prefix_rule10_l); mu_run_test(test_remove_complex_prefix_rule10_r); mu_run_test(test_remove_complex_prefix_rule10_w); mu_run_test(test_remove_complex_prefix_rule10_y); mu_run_test(test_remove_complex_prefix_rule11_f); mu_run_test(test_remove_complex_prefix_rule11_b); mu_run_test(test_remove_complex_prefix_rule11_v); mu_run_test(test_remove_complex_prefix_rule11_unstemmable); mu_run_test(test_remove_complex_prefix_rule12); mu_run_test(test_remove_complex_prefix_rule13a); mu_run_test(test_remove_complex_prefix_rule13b); mu_run_test(test_remove_complex_prefix_rule14_c); mu_run_test(test_remove_complex_prefix_rule14_d); mu_run_test(test_remove_complex_prefix_rule14_j); mu_run_test(test_remove_complex_prefix_rule14_s); mu_run_test(test_remove_complex_prefix_rule14_t); mu_run_test(test_remove_complex_prefix_rule14_z); mu_run_test(test_remove_complex_prefix_rule15a); mu_run_test(test_remove_complex_prefix_rule15b); mu_run_test(test_remove_complex_prefix_rule16_g); mu_run_test(test_remove_complex_prefix_rule16_h); mu_run_test(test_remove_complex_prefix_rule16_q); mu_run_test(test_remove_complex_prefix_rule16_k); mu_run_test(test_remove_complex_prefix_rule17a); mu_run_test(test_remove_complex_prefix_rule17b); mu_run_test(test_remove_complex_prefix_rule17c); mu_run_test(test_remove_complex_prefix_rule17d); mu_run_test(test_remove_complex_prefix_rule18a); mu_run_test(test_remove_complex_prefix_rule18b); mu_run_test(test_remove_complex_prefix_rule19_1); mu_run_test(test_remove_complex_prefix_rule19_2); mu_run_test(test_remove_complex_prefix_rule20_1); mu_run_test(test_remove_complex_prefix_rule20_2); mu_run_test(test_remove_prefixes_when_partially_stemmed); mu_run_test(test_remove_prefixes_runs_3_times); return NULL; }