示例#1
0
/**
 * @brief Opens a dictionary file and returns the respective Dictionary object
 *
 * @param name name of the file containing the dictionary
 * @return the Dictionary object or NULL in case of error.
 */
Dictionary* dictionary_open(const char *name) 
{
    Dictionary *dic;
    FILE *gzf;
    gzf = gzopen(name, "rb");
    if (!gzf) report_error("error opening file %s for reading.\n", name);

    dic = dictionary_load(gzf);

    gzclose(gzf);
    return dic;
}
示例#2
0
/**
  Testuje wczytywanie drzewa.
  @param state Środowisko testowe.
  */
static void dictionary_load_test(void** state)
{
    struct dictionary *dict = NULL;

    push_word_to_io_mock(L"ciupagą*^^^^^^^\n13\na*b*3*2\n");
    dict = dictionary_load(stdin);
    pop_remaining_chars();
    assert_non_null(dict);
    assert_true(dictionary_find(dict, L"ciupagą"));
    assert_int_equal(dictionary_hints_max_cost(dict, 2), 13);
    dictionary_done(dict);
}
示例#3
0
//TODO - create a test with berlarikah, to test return suffix
char *all_tests()
{
  mu_suite_start();

  dictionary_load(dictionary_fullpath("data/kata-dasar.txt"));

  mu_run_test(test_is_plural);
  mu_run_test(test_plural_parts);
  mu_run_test(test_stem_plural_word_when_both_words_are_root_words_and_the_same);
  mu_run_test(test_stem_plural_word_when_one_word_has_suffixes);

  return NULL;
}
示例#4
0
/**
 * Ładuje słownik ze ścieżki.
 * W przypadku błędu systemowego, wypisuje również systemową informację
 * o błędzie.
 * @param [in] path Ścieżka.
 * @param [out] dict Wskaźnik na wskaźnik, pod którym zostanie zapisany
 * wskaźnik na wczytany słownik.
 * Słownik musi zostać zwolniony przez użytkownika.
 * @return 0 gdy nie ma błędu, niezerowa wartość w przypadku błędu.
 */
int load_dictionary(const char * path, struct dictionary ** dict)
{
    FILE * file = fopen(path, "r");
    if (NULL == file)
    {
        error(0, errno, "Error reading dict from path %s", path);
        return errno;
    }
    *dict = dictionary_load(file);
    fclose(file);
    if (NULL == *dict)
        return -1;
    return 0;
}
示例#5
0
/**
  Funkcja main.
  Główna funkcja programu dict-check.
 */
int main(int argc, const char **argv)
{
	setlocale(LC_ALL, "pl_PL.UTF-8");
	if(argc < 2 || argc > 3)
	{
		fwprintf(stderr, L"Błędna liczba argumentów!\n");
		usage();
	}
	if(argc == 3 && (argv[1][0] != '-' || argv[1][1] != 'v' || argv[1][2] != '\0'))
	{
		fwprintf(stderr, L"Błędny argument!\n");
		usage();
	}
	FILE *f = fopen(argv[argc - 1], "r");
	if(!f)
	{
		fwprintf(stderr, L"Nie udało się załadować pliku %s!\n", argv[argc - 1]);
		usage();
	}
	bool v_option = argc == 3;
	struct dictionary * dict = dictionary_load(f);
	fclose(f);
	if(dict == NULL)
	{
		fwprintf(stderr, L"Nie udało się załadować pliku %s!\n", argv[argc - 1]);
		usage();
	}
	vector *buffer = read_input();
	int line_number = 1;
	int char_number = 1;
	for(int index = 0; index < vector_size(buffer); index++, char_number++)
	{
		wchar_t c = ((wchar_t *)vector_content(buffer))[index];
		if(c == L'\n')
		{
			line_number++;
			char_number = 0;
		}
		if(!iswalpha(c))
			wprintf(L"%lc", c);
		else
			parse_word(&index, &char_number, &line_number, vector_content(buffer), dict, v_option);
	}
	vector_done(buffer);
	dictionary_done(dict);
	return 0;
}
示例#6
0
char *all_tests()
{
  mu_suite_start();

  char *path = dictionary_fullpath("data/kata-dasar.txt");
  dictionary_load(path);
  free(path);

  mu_run_test(test_stem_singular_word_does_not_need_stemming);
  mu_run_test(test_stem_singular_word_returns_original_word_when_cannot_stem);
  mu_run_test(test_stem_singular_word_removes_suffixes);

  mu_run_test(test_stem_singular_word_removes_plain_prefixes);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_1);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_2);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_3);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_4);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_5);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_6);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_7);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_8);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_9);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_10);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_11);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_12);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_13);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_14);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_15);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_16);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_17);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_18);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_19);
  mu_run_test(test_stem_singular_word_removes_complex_prefixes_20);
  mu_run_test(test_stem_singular_word_uses_precedence_adjustment);
  mu_run_test(test_stem_singular_word_uses_precedence_adjustment_2);
  return NULL;
}
示例#7
0
文件: main.c 项目: raedwulf/comprox
int cr_main(int argc, char** argv) {
    struct {
        uint32_t m_size;
        uint8_t  m_filt;
        uint8_t  m_prec;
    } __attribute__((packed)) block_header;

    const char* src_name = "<stdin>";
    const char* dst_name = "<stdout>";
    FILE* src_file;
    FILE* dst_file;
    data_block_t ib = INITIAL_BLOCK;
    data_block_t ob = INITIAL_BLOCK;
    data_block_t* xb;
    data_block_t* yb;
    uint32_t src_size;
    uint32_t dst_size;
    int filt = 0;
    int enc;

    data_block_t dic_xb = INITIAL_BLOCK;
    data_block_t dic_yb = INITIAL_BLOCK;
    int nword;

    struct timeval time_start;
    struct timeval time_end;
    double cost_time;

    gettimeofday(&time_start, NULL);
    src_file = stdin;
    dst_file = stdout;

#if defined(_WIN32) || defined(_WIN64)
    /* we need to set stdin/stdout to binary mode under windows */
    setmode(fileno(stdin), O_BINARY);
    setmode(fileno(stdout), O_BINARY);
#endif

    /* reset global models for compressing/decompressing */
    reset_models();

    /* process arguments */
    if((argc = cr_process_arguments(argc, argv)) == 0) {
        return -1;
    }

    /* start! */
    fprintf(stderr, "%s\n", cr_start_info);
    if(argc >=2 && argc <= 4 && strcmp(argv[1], "e") == 0) { /* encode */
        enc = 1;
        if(argc >= 3) src_name = argv[2], src_file = fopen(src_name, "rb");
        if(argc >= 4) dst_name = argv[3], dst_file = fopen(dst_name, "wb");
        if(src_file == stdin) { /* copy input data to temporary file, since stdin doesn't support rewind() */
            data_block_reserve(&ib, 1048576);
            src_file = tmpfile();
            while((ib.m_size = fread(ib.m_data, 1, ib.m_capacity, stdin)) > 0) {
                fwrite(ib.m_data, 1, ib.m_size, src_file);
            }
            data_block_destroy(&ib);
            rewind(src_file);
            ib = INITIAL_BLOCK;
        }

        if(src_file != NULL && dst_file != NULL) {
            write_magic(dst_file);
            fprintf(stderr, "compressing %s to %s, block_size = %uMB...\n", src_name, dst_name, cr_split_size / 1048576);

            /* build static dictionary */
            fprintf(stderr, "%s\n", "-> building static dictionary...");
            dicpick(src_file, &dic_xb);
            rewind(src_file);
            nword = dictionary_load((char*)dic_xb.m_data, 1);

            /* encode static dictionary */
            dic_lcp_encode(&dic_xb);
            lzencode(&dic_xb, &dic_yb, 0);
            reset_models();
            fprintf(stderr, "added %d words to dictionary, compressed size = %u bytes\n", nword, dic_yb.m_size);

            /* write static dictionary to dst_file */
            fwrite(&dic_yb.m_size, sizeof(dic_yb.m_size), 1, dst_file);
            fwrite( dic_yb.m_data, 1, dic_yb.m_size, dst_file);
            data_block_destroy(&dic_xb);
            data_block_destroy(&dic_yb);

            while(!ferror(src_file) && !ferror(dst_file) && !feof(src_file)) {
                xb = &ib;
                yb = &ob;
                data_block_resize(xb, cr_split_size);

                /* read blocks */
                xb->m_size = fread(xb->m_data, 1, cr_split_size, src_file);

                /* precompress with filters */
                if(cr_filt_enable) {
                    filt = filter_inplace(xb->m_data, xb->m_size, FILTER_ENC);
                }

                /* encode */
                data_block_resize(yb, 0);
                dictionary_encode(xb, yb);

                if(!cr_prec_enable) {
                    swap_xyblock(&xb, &yb);
                    data_block_resize(yb, 0);
                    lzencode(xb, yb, !ferror(stderr));
                }

                /* write blocks */
                if(yb->m_size > 0) {
                    block_header.m_size = yb->m_size;
                    block_header.m_filt = filt;
                    block_header.m_prec = cr_prec_enable;

                    fwrite(&block_header, sizeof(block_header), 1, dst_file);
                    fwrite(yb->m_data, 1, yb->m_size, dst_file);
                }
            }
            if(ferror(src_file) || ferror(dst_file)) {
                perror("ferror()");
                return -1;
            }
        } else {
            perror("fopen()");
            return -1;
        }
        src_size = ftell(src_file);
        dst_size = ftell(dst_file);
        fclose(src_file);
        fclose(dst_file);

    } else if(argc >= 2 && argc <= 4 && strcmp(argv[1], "d") == 0) { /* decode */
        enc = 0;
        if(argc >= 3) src_name = argv[2], src_file = fopen(src_name, "rb");
        if(argc >= 4) dst_name = argv[3], dst_file = fopen(dst_name, "wb");

        if(src_file == stdin) { /* copy input data to temporary file, since stdin doesn't support rewind() */
            data_block_reserve(&ib, 1048576);
            src_file = tmpfile();
            while((ib.m_size = fread(ib.m_data, 1, ib.m_capacity, stdin)) > 0) {
                fwrite(ib.m_data, 1, ib.m_size, src_file);
            }
            data_block_destroy(&ib);
            rewind(src_file);
            ib = INITIAL_BLOCK;
        }
        if(src_file != NULL && dst_file != NULL) {
            if(!check_magic(src_file)) {
                fprintf(stderr, "%s\n", "check_magic() failed.");
                fclose(src_file);
                fclose(dst_file);
                return -1;
            }
            fprintf(stderr, "decompressing %s to %s...\n", src_name, dst_name);

            /* decode static dictionary */
            fprintf(stderr, "%s\n", "-> decoding static dictionary...");

            /* read size of static dictionary from src_file */
            fread(&dic_yb.m_size, sizeof(dic_yb.m_size), 1, src_file);

            /* read static dictionary from src_file */
            data_block_resize(&dic_yb, dic_yb.m_size);
            fread(dic_yb.m_data, 1, dic_yb.m_size, src_file);

            /* decode static dictionary */
            lzdecode(&dic_yb, &dic_xb, 0);
            reset_models();
            dic_lcp_decode(&dic_xb);

            dictionary_load((char*)dic_xb.m_data, 0);
            data_block_destroy(&dic_xb);
            data_block_destroy(&dic_yb);

            while(!ferror(src_file) && !ferror(dst_file) && !feof(src_file)) {
                xb = &ib;
                yb = &ob;

                /* read blocks */
                if(fread(&block_header, sizeof(block_header), 1, src_file) != 1) {
                    break;
                }
                data_block_resize(yb, block_header.m_size);
                yb->m_size = fread(yb->m_data, 1, yb->m_size, src_file);

                /* decode */
                if(!block_header.m_prec) {
                    data_block_resize(xb, 0);
                    lzdecode(yb, xb, !ferror(stderr));
                    swap_xyblock(&xb, &yb);
                }
                data_block_resize(xb, 0);
                dictionary_decode(yb, xb, dst_file);

                /* precompress with filters */
                if(block_header.m_filt) {
                    filter_inplace(xb->m_data, xb->m_size, FILTER_DEC);
                }

                /* write blocks */
                if(xb->m_size > 0) {
                    fwrite(xb->m_data, 1, xb->m_size, dst_file);
                }
            }
            if(ferror(src_file) || ferror(dst_file)) {
                perror("ferror()");
                return -1;
            }
        } else {
            perror("fopen()");
            return -1;
        }
        src_size = ftell(src_file);
        dst_size = ftell(dst_file);
        fclose(src_file);
        fclose(dst_file);

    } else {
        /* bad argument! */
        fprintf(stderr, "%s\n", cr_usage_info);
        return -1;
    }

    data_block_destroy(&ib);
    data_block_destroy(&ob);

    gettimeofday(&time_end, NULL);
    cost_time = (time_end.tv_sec - time_start.tv_sec) + (time_end.tv_usec - time_start.tv_usec) / 1000000.0;

    fprintf(stderr, "%u bytes => %u bytes\n\n", src_size, dst_size);
    if(enc) {
        fprintf(stderr, "encode-speed:   %.3lf MB/s\n",  src_size / 1048576 / cost_time);
        fprintf(stderr, "cost-time:      %.3lf s\n",     cost_time);
        fprintf(stderr, "compress-ratio: %.3lf\n",       (double)dst_size / src_size);
        fprintf(stderr, "bpb:            %.3lf\n",       (double)dst_size / src_size * 8);
    } else {
        fprintf(stderr, "decode-speed:   %.3lf MB/s\n",  dst_size / 1048576 / cost_time);
        fprintf(stderr, "cost-time:      %.3lf s\n",     cost_time);
        fprintf(stderr, "compress-ratio: %.3lf\n",       (double)src_size / dst_size);
        fprintf(stderr, "bpb:            %.3lf\n",       (double)src_size / dst_size * 8);
    }
    return 0;
}
char *all_tests()
{
  mu_suite_start();

  char *path = dictionary_fullpath("data/kata-dasar.txt");
  dictionary_load(path);
  free(path);


  mu_run_test(test_remove_plain_prefix_returns_0_if_word_notin_dictionary)
  mu_run_test(test_remove_plain_prefix_di);
  mu_run_test(test_remove_plain_prefix_ke);
  mu_run_test(test_remove_plain_prefix_se);
  mu_run_test(test_remove_complex_prefix_rule1_a);
  mu_run_test(test_remove_complex_prefix_rule1_b);
  mu_run_test(test_remove_complex_prefix_rule1_a_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule2);
  mu_run_test(test_remove_complex_prefix_rule2_excludes_er);
  mu_run_test(test_remove_complex_prefix_rule2_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule3_only_includes_er);
  mu_run_test(test_remove_complex_prefix_rule3_only_includes_er_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule3_only_includes_er_not_stemmed);
  mu_run_test(test_remove_complex_prefix_rule4);
  mu_run_test(test_remove_complex_prefix_rule4_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule4_not_stemmed);
  mu_run_test(test_remove_complex_prefix_rule5);
  mu_run_test(test_remove_complex_prefix_rule5_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule5_not_stemmed);
  mu_run_test(test_remove_complex_prefix_rule6a);
  mu_run_test(test_remove_complex_prefix_rule6a_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule6a_not_stemmed);
  mu_run_test(test_remove_complex_prefix_rule6b);
  mu_run_test(test_remove_complex_prefix_rule6b_not_stemmed);
  mu_run_test(test_remove_complex_prefix_rule7);
  mu_run_test(test_remove_complex_prefix_rule7_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule8);
  mu_run_test(test_remove_complex_prefix_rule8_excludes_er);
  mu_run_test(test_remove_complex_prefix_rule8_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule9);
  mu_run_test(test_remove_complex_prefix_rule9_partially_stemmed);
  mu_run_test(test_remove_complex_prefix_rule10_l);
  mu_run_test(test_remove_complex_prefix_rule10_r);
  mu_run_test(test_remove_complex_prefix_rule10_w);
  mu_run_test(test_remove_complex_prefix_rule10_y);
  mu_run_test(test_remove_complex_prefix_rule11_f);
  mu_run_test(test_remove_complex_prefix_rule11_b);
  mu_run_test(test_remove_complex_prefix_rule11_v);
  mu_run_test(test_remove_complex_prefix_rule11_unstemmable);
  mu_run_test(test_remove_complex_prefix_rule12);
  mu_run_test(test_remove_complex_prefix_rule13a);
  mu_run_test(test_remove_complex_prefix_rule13b);
  mu_run_test(test_remove_complex_prefix_rule14_c);
  mu_run_test(test_remove_complex_prefix_rule14_d);
  mu_run_test(test_remove_complex_prefix_rule14_j);
  mu_run_test(test_remove_complex_prefix_rule14_s);
  mu_run_test(test_remove_complex_prefix_rule14_t);
  mu_run_test(test_remove_complex_prefix_rule14_z);
  mu_run_test(test_remove_complex_prefix_rule15a);
  mu_run_test(test_remove_complex_prefix_rule15b);
  mu_run_test(test_remove_complex_prefix_rule16_g);
  mu_run_test(test_remove_complex_prefix_rule16_h);
  mu_run_test(test_remove_complex_prefix_rule16_q);
  mu_run_test(test_remove_complex_prefix_rule16_k);
  mu_run_test(test_remove_complex_prefix_rule17a);
  mu_run_test(test_remove_complex_prefix_rule17b);
  mu_run_test(test_remove_complex_prefix_rule17c);
  mu_run_test(test_remove_complex_prefix_rule17d);
  mu_run_test(test_remove_complex_prefix_rule18a);
  mu_run_test(test_remove_complex_prefix_rule18b);
  mu_run_test(test_remove_complex_prefix_rule19_1);
  mu_run_test(test_remove_complex_prefix_rule19_2);
  mu_run_test(test_remove_complex_prefix_rule20_1);
  mu_run_test(test_remove_complex_prefix_rule20_2);
  mu_run_test(test_remove_prefixes_when_partially_stemmed);
  mu_run_test(test_remove_prefixes_runs_3_times);
  return NULL;
}