示例#1
0
/* Attaches lemmatizer data from pointers. Safe to do turglem_close() after this.
*/
turglem turglem_attach(const void *ptr_dic_autom_nodes, const void *ptr_dic_autom_links, const void *ptr_predict_autom_nodes, const void *ptr_predict_autom_links, const void *ptr_paradigms)
{
	turglem lem = (turglem)malloc(sizeof(struct turglem_struct));
	if (0 == lem) return 0;

	lem->words = MAFSA_automaton_attach(ptr_dic_autom_nodes, ptr_dic_autom_links, 0);
	if (0 == lem->words)
	{
		free(lem);
		return 0;
	}

	lem->prediction = MAFSA_automaton_attach(ptr_predict_autom_nodes, ptr_predict_autom_links, 0);
	if (0 == lem->prediction)
	{
		MAFSA_automaton_close(lem->words);
		free(lem);
		return 0;
	}

	lem->paradigms = turglem_paradigms_attach(ptr_paradigms, 0);
	if (0 == lem->paradigms)
	{
		MAFSA_automaton_close(lem->words);
		MAFSA_automaton_close(lem->prediction);
		free(lem);
		return 0;
	}

	return lem;
}
示例#2
0
void turglem_close(turglem lem)
{
	MAFSA_automaton_close(lem->words);
	MAFSA_automaton_close(lem->prediction);
	turglem_paradigms_close(lem->paradigms);
	free(lem);
}
示例#3
0
/* Loads all lemmatizer data from files: main automaton, prediction automaton, flexias.
 * prediction may be 0, if you are not going to use prediction at all.
*/
turglem turglem_load(const char *fname_dic_autom, const char *fname_predict_autom, const char *fname_paradigms, int *err_no, int *err_what)
{
	turglem lem;

	if (err_no) *err_no = 0;
	if (err_what) *err_what = 0;	

	lem = (turglem)malloc(sizeof(struct turglem_struct));
	if (0 == lem)
	{
		if (err_what) *err_what = TURGLEM_ERROR_NOMEM;
		return 0;
	}

	if (err_what) *err_what = TURGLEM_ERROR_DICTIONARY;
	lem->words = MAFSA_automaton_load_from_binary_file(fname_dic_autom, err_no);
	if (0 == lem->words)
	{
		free(lem);
		return 0;
	}

	if (err_what) *err_what = TURGLEM_ERROR_PREDICTION;
	lem->prediction = MAFSA_automaton_load_from_binary_file(fname_predict_autom, err_no);
	if (0 == lem->prediction)
	{
		MAFSA_automaton_close(lem->words);
		free(lem);
		return 0;
	}

	if (err_what) *err_what = TURGLEM_ERROR_PARADIGMS;
	lem->paradigms = turglem_paradigms_load_from_binary_file(fname_paradigms, err_no);
	if (0 == lem->paradigms)
	{
		MAFSA_automaton_close(lem->words);
		MAFSA_automaton_close(lem->prediction);
		free(lem);
		return 0;
	}

	if (err_what) *err_what = 0;

	return lem;
}
示例#4
0
int main(int argc, char *argv[])
{
    MAFSA::daciuk<MAX_LETTER + 1> dict;
    MAFSA_automaton ma;

    int i;
    int rc;

    uint32_t ex = 0, ok = 0;
    uint32_t cs = 0, ss = 0;

    for (i = 0; urls[i]; ++i)
    {
        size_t sz;
        MAFSA_letter word [1024];

        struct timeval tvc1;
        struct timeval tvc2;

        sz = strlen((const char *) urls[i]);
        memcpy(word, urls[i], sz);

        gettimeofday(&tvc1, NULL);
        dict.insert(word, sz);
        gettimeofday(&tvc2, NULL);

        cs += (tvc2.tv_sec - tvc1.tv_sec) * 1000000 + (tvc2.tv_usec - tvc1.tv_usec);
    }

    dict.save_to_file(SAVE_TO);
    ma = MAFSA_automaton_load_from_binary_file(SAVE_TO, NULL);

    for (;;)
    {
        MAFSA_letter outbuf [1024];
        char urlbuf [4096], *pos;
        size_t outsz = 0, urlsz = 0;

        struct timeval tvs1;
        struct timeval tvs2;

        if (NULL == fgets(urlbuf, 4096, stdin))
        {
            break;
        }

        if (NULL != (pos = strchr(urlbuf, '\n'))) *pos = 0;
        if (NULL != (pos = strchr(urlbuf, '\r'))) *pos = 0;

        urlsz = strlen(urlbuf);

        gettimeofday(&tvs1, NULL);
        rc = MAFSA_automaton_search(ma, (const MAFSA_letter *) urlbuf, urlsz, outbuf, 1024, &outsz);
        gettimeofday(&tvs2, NULL);

        ss += (tvs2.tv_sec - tvs1.tv_sec) * 1000000 + (tvs2.tv_usec - tvs1.tv_usec);

        if (-1 == rc)
        {
            ++ok;
        }
        else
        {
            char dbg [4096];

            ++ex;
            pos = dbg;

            snprintf(pos, rc + 1, "%s", urlbuf);
            pos += rc;

            snprintf(pos, sizeof("\033[1;31m"), "\033[1;31m");
            pos += sizeof("\033[1;31m") - 1;

            snprintf(pos, outsz + 1, "%s", outbuf);
            pos += outsz;

            snprintf(pos, sizeof("\033[0m"), "\033[0m");
            pos += sizeof("\033[0m") - 1;

            snprintf(pos, urlsz - outsz - rc + 1, "%s", urlbuf + rc + outsz);
            printf("Filter (%03d .. %03u): %s\n", rc, (unsigned int) (rc + outsz), dbg);
        }
    }

    printf("\n");
    printf("Result: ex = %d, ok = %d\n", ex, ok);
    printf("TimeComple: %6u.%06u seconds\n", cs / 1000000, cs % 1000000);
    printf("TimeSearch: %6u.%06u seconds\n", ss / 1000000, ss % 1000000);

    MAFSA_automaton_close(ma);

    return 0;
}