Esempio n. 1
0
File: trie.c Progetto: kritik/trie
static VALUE walk_all_paths_with_values(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
	int c;
    for(c = 1; c < 256; c++) {
		if(trie_state_is_walkable(state,c)) {
			TrieState *next_state = trie_state_clone(state);
			trie_state_walk(next_state, c);

			prefix[prefix_size] = c;
			prefix[prefix_size + 1] = 0;

			if(trie_state_is_terminal(next_state)) {
				TrieState *end_state = trie_state_clone(next_state);
				trie_state_walk(end_state, '\0');
 
				char *word = (char*) malloc(prefix_size + 2);
				memcpy(word, prefix, prefix_size + 2);

				VALUE tuple = rb_ary_new();
				rb_ary_push(tuple, rb_str_new2(word));

				TrieData trie_data = trie_state_get_data(end_state);
				rb_ary_push(tuple, (VALUE)trie_data);
				rb_ary_push(children, tuple);
 
				trie_state_free(end_state);
			}

			walk_all_paths_with_values(trie, children, next_state, prefix, prefix_size + 1);
			
			prefix[prefix_size] = 0;
			trie_state_free(next_state);
		}
    }
}
Esempio n. 2
0
File: trie.c Progetto: kritik/trie
/*
 * call-seq:
 *   children(prefix) -> [ key, ... ]
 *
 * Finds all keys in the Trie beginning with the given prefix. 
 *
 */
static VALUE rb_trie_children(VALUE self, VALUE prefix) {
    if(NIL_P(prefix))
		return rb_ary_new();

	StringValue(prefix);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	int prefix_size = RSTRING_LEN(prefix);
    TrieState *state = trie_root(trie);
    VALUE children = rb_ary_new();
	TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
    
    const TrieChar *iterator = char_prefix;
    while(*iterator != 0) {
		if(!trie_state_is_walkable(state, *iterator))
			return children;
		trie_state_walk(state, *iterator);
		iterator++;
    }

    if(trie_state_is_terminal(state))
		rb_ary_push(children, prefix);
	
	char prefix_buffer[1024];
	memcpy(prefix_buffer, char_prefix, prefix_size);
	prefix_buffer[prefix_size] = 0;

    walk_all_paths(trie, children, state, prefix_buffer, prefix_size);

    trie_state_free(state);
    return children;
}
Esempio n. 3
0
Bool
sb_trie_state_is_terminal (const SBTrieState *s)
{
    if (!s)
        return FALSE;

    return trie_state_is_terminal (s->trie_state);
}
Esempio n. 4
0
File: trie.c Progetto: kritik/trie
static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
	int c;
    for(c = 1; c < 256; c++) {
		if(trie_state_is_walkable(state,c)) {
			TrieState *next_state = trie_state_clone(state);
			trie_state_walk(next_state, c);

			prefix[prefix_size] = c;
			prefix[prefix_size + 1] = 0;

			if(trie_state_is_terminal(next_state)) {
				char *word = (char*) malloc(prefix_size + 2);
				memcpy(word, prefix, prefix_size + 2);
				rb_ary_push(children, rb_str_new2(word));
			}

			walk_all_paths(trie, children, next_state, prefix, prefix_size + 1);
			
			prefix[prefix_size] = 0;
			trie_state_free(next_state);
		}
    }
}
Esempio n. 5
0
File: trie.c Progetto: kritik/trie
/*
 * call-seq:
 *   terminal? -> true/false
 *
 * Returns true if this node is at the end of a key.  So if you have two keys in your Trie, "he" and
 * "hello", and you walk all the way to the end of "hello", the "e" and the "o" will return true for terminal?.
 *
 */
static VALUE rb_trie_node_terminal(VALUE self) {
    TrieState *state;
    Data_Get_Struct(self, TrieState, state);
    
    return trie_state_is_terminal(state) ? Qtrue : Qnil;
}
Esempio n. 6
0
static int
brk_recover_try (const thchar_t *s, int len,
                 const char *brkpos_hints,
                 int pos[], size_t n)
{
    BrkPool     *pool;
    BrkPool     *node;
    int          ret;

    pool = brk_root_pool (n);
    ret = 0;

    while (NULL != (node = brk_pool_get_node (pool))) {
        BrkShot *shot = &node->shot;
        BrkPool *match;
        int      is_keep_node, is_terminal;

        /* walk dictionary character-wise till a word is matched */
        is_keep_node = 1;
        do {
            if (!trie_state_walk (shot->dict_state,
                                  th_tis2uni (s[shot->str_pos++])))
            {
                is_terminal = 0;
                is_keep_node = 0;
                break;
            }

            is_terminal = trie_state_is_terminal (shot->dict_state);
            if (shot->str_pos >= len) {
                if (!is_terminal) {
                    is_keep_node = 0;
                }
                break;
            }
        } while (!(is_terminal && brkpos_hints[shot->str_pos]));

        if (!is_keep_node) {
            pool = brk_pool_delete (pool, node);
            continue;
        }

        /* if node still kept, mark break position and rewind dictionary */
        if (is_terminal) {
            if (shot->str_pos < len &&
                !trie_state_is_single (shot->dict_state))
            {
                /* add node to mark break position instead of current */
                node = brk_pool_node_new (shot);
                pool = brk_pool_add (pool, node);
                shot = &node->shot;
            }

            trie_state_rewind (shot->dict_state);
            shot->brk_pos [shot->cur_brk_pos++] = shot->str_pos;
        }

        if (shot->str_pos == len || shot->cur_brk_pos == n) {
            /* path is done; get result & remove it */
            if (shot->cur_brk_pos > ret) {
                ret = shot->cur_brk_pos;
                memcpy (pos, shot->brk_pos, ret * sizeof (pos[0]));
            }
            pool = brk_pool_delete (pool, node);
            /* stop as soon as first solution is found */
            if (ret == n)
                break;
        } else {
            /* find matched nodes, contest and keep the best one */
            while (NULL != (match = brk_pool_match (pool, node))) {
                pool = brk_pool_delete (pool, match);
            }
        }
    }

    brk_pool_free (pool);
    return ret;
}
Esempio n. 7
0
static int
brk_maximal_do_impl (const thchar_t *s, int len,
                     const char *brkpos_hints,
                     int pos[], size_t n)
{
    BrkPool     *pool;
    BrkPool     *node;
    BestBrk     *best_brk;
    RecovHist    recov_hist;
    int          ret;

    pool = brk_root_pool (n);
    best_brk = best_brk_new (n);
    if (!best_brk)
        return 0;
    recov_hist.pos = recov_hist.recov = -1;

    while (NULL != (node = brk_pool_get_node (pool))) {
        BrkShot *shot = &node->shot;
        BrkPool *match;
        int      is_keep_node, is_terminal, is_recovered;
        int      str_pos;

        /* walk dictionary character-wise till a word is matched */
        is_keep_node = 1;
        is_recovered = 0;
        str_pos = shot->str_pos;
        do {
            if (!trie_state_walk (shot->dict_state, th_tis2uni (s[str_pos++])))
            {
                int recovered;

                is_terminal = 0;

                /* try to recover from error */
                recovered = brk_recover (s, len, shot->str_pos + 1,
                                         brkpos_hints, &recov_hist);
                if (-1 != recovered) {
                    /* add penalty by recovered - recent break pos */
                    shot->penalty += recovered;
                    if (shot->cur_brk_pos > 0)
                        shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1];

                    str_pos = recovered;
                    is_recovered = 1;
                } else {
                    /* add penalty with string len - recent break pos */
                    shot->penalty += len;
                    if (shot->cur_brk_pos > 0)
                        shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1];

                    shot->brk_pos [shot->cur_brk_pos++] = str_pos = len;
                    is_keep_node = 0;
                }
                break;
            }

            is_terminal = trie_state_is_terminal (shot->dict_state);
            if (str_pos >= len) {
                if (!is_terminal) {
                    /* add penalty with string len - recent break pos */
                    shot->penalty += len;
                    if (shot->cur_brk_pos > 0)
                        shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1];

                    shot->brk_pos [shot->cur_brk_pos++] = len;
                    is_keep_node = 0;
                }
                break;
            }
        } while (!(is_terminal && brkpos_hints[str_pos]));

        shot->str_pos = str_pos;

        /* if node still kept, mark break position and rewind dictionary */
        if (is_keep_node && (is_terminal || is_recovered)) {
            if (shot->str_pos < len && is_terminal &&
                !trie_state_is_single (shot->dict_state))
            {
                /* add node to mark break position instead of current */
                node = brk_pool_node_new (shot);
                pool = brk_pool_add (pool, node);
                shot = &node->shot;
            }

            trie_state_rewind (shot->dict_state);
            shot->brk_pos [shot->cur_brk_pos++] = shot->str_pos;
        }

        if (!is_keep_node || shot->str_pos == len || shot->cur_brk_pos >= n) {
            /* path is done; contest and remove */
            best_brk_contest (best_brk, shot);
            pool = brk_pool_delete (pool, node);
        } else {
            /* find matched nodes, contest and keep the best one */
            while (NULL != (match = brk_pool_match (pool, node))) {
                BrkPool *del_node;

                if (match->shot.penalty < node->shot.penalty ||
                    (match->shot.penalty == node->shot.penalty &&
                     match->shot.cur_brk_pos < node->shot.cur_brk_pos))
                {
                    del_node = node;
                    node = match;
                } else {
                    del_node = match;
                }
                pool = brk_pool_delete (pool, del_node);
            }
        }
    }

    ret = best_brk->cur_brk_pos;
    memcpy (pos, best_brk->brk_pos, ret * sizeof (pos[0]));

    brk_pool_free (pool);
    best_brk_free (best_brk);
    return ret;
}
Esempio n. 8
0
Bool
sb_trie_state_is_terminal (const SBTrieState *s)
{
    return trie_state_is_terminal (s->trie_state);
}