Beispiel #1
0
static VALUE walk_all_paths_with_values(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
	int c;
    for(c = 1; c < 256; c++) {
		if(trie_state_is_walkable(state,c)) {
			TrieState *next_state = trie_state_clone(state);
			trie_state_walk(next_state, c);

			prefix[prefix_size] = c;
			prefix[prefix_size + 1] = 0;

			if(trie_state_is_terminal(next_state)) {
				TrieState *end_state = trie_state_clone(next_state);
				trie_state_walk(end_state, '\0');
 
				char *word = (char*) malloc(prefix_size + 2);
				memcpy(word, prefix, prefix_size + 2);

				VALUE tuple = rb_ary_new();
				rb_ary_push(tuple, rb_str_new2(word));

				TrieData trie_data = trie_state_get_data(end_state);
				rb_ary_push(tuple, (VALUE)trie_data);
				rb_ary_push(children, tuple);
 
				trie_state_free(end_state);
			}

			walk_all_paths_with_values(trie, children, next_state, prefix, prefix_size + 1);
			
			prefix[prefix_size] = 0;
			trie_state_free(next_state);
		}
    }
}
Beispiel #2
0
/*
 * call-seq:
 *   children(prefix) -> [ key, ... ]
 *
 * Finds all keys in the Trie beginning with the given prefix. 
 *
 */
static VALUE rb_trie_children(VALUE self, VALUE prefix) {
    if(NIL_P(prefix))
		return rb_ary_new();

	StringValue(prefix);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	int prefix_size = RSTRING_LEN(prefix);
    TrieState *state = trie_root(trie);
    VALUE children = rb_ary_new();
	TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
    
    const TrieChar *iterator = char_prefix;
    while(*iterator != 0) {
		if(!trie_state_is_walkable(state, *iterator))
			return children;
		trie_state_walk(state, *iterator);
		iterator++;
    }

    if(trie_state_is_terminal(state))
		rb_ary_push(children, prefix);
	
	char prefix_buffer[1024];
	memcpy(prefix_buffer, char_prefix, prefix_size);
	prefix_buffer[prefix_size] = 0;

    walk_all_paths(trie, children, state, prefix_buffer, prefix_size);

    trie_state_free(state);
    return children;
}
Beispiel #3
0
/*
* call-seq:
*   walk(letter) -> TrieNode
	*
		* Tries to walk down a particular branch of the Trie.  It clones the node it is called on and 
			* walks with that one, leaving the original unchanged.
				*
 */
static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) {
	StringValue(rchar);

	VALUE new_node = rb_funcall(self, rb_intern("dup"), 0);

	TrieState *state;
	long string_length  ;
	TrieChar *char_prefix ;
	long p;
	Bool result;
	Data_Get_Struct(new_node, TrieState, state);

	string_length = (long)NUM2LONG(rb_funcall(rchar, rb_intern("length"), 0));
	if (string_length == 1)   {
		char_prefix = (TrieChar*)RSTRING_PTR(rchar);
		for (p = 0; p < RSTRING_LEN(rchar); p++) {
			result = trie_state_walk(state, *char_prefix);
			if (!result)
				return Qnil;
			char_prefix++;
		}
		return new_node;
	}
	else
		return Qnil;

}
Beispiel #4
0
Bool
sb_trie_state_walk (SBTrieState *s, SBChar c)
{
    return trie_state_walk (s->trie_state,
                            alpha_map_char_to_alphabet (s->sb_trie->alpha_map,
                                                        (UniChar) c));
}
Beispiel #5
0
/*
 * call-seq:
 *   value
 *
 * Attempts to get the value at this node of the Trie.  This only works if the node is a terminal 
 * (i.e. end of a key), otherwise it returns nil.
 *
 */
static VALUE rb_trie_node_value(VALUE self) {
    TrieState *state;
	TrieState *dup;
    Data_Get_Struct(self, TrieState, state);
    
    dup = trie_state_clone(state);

    trie_state_walk(dup, 0);
    TrieData trie_data = trie_state_get_data(dup);
    trie_state_free(dup);

    return TRIE_DATA_ERROR == trie_data ? Qnil : (VALUE)trie_data;
}
Beispiel #6
0
Bool
trie_state_is_leaf (const TrieState *s)
{
    TrieState *t;
    Bool       ret;

    t = trie_state_clone (s);

    ret = trie_state_walk (t, TRIE_CHAR_TERM);

    trie_state_free (t);

    return ret;
}
Beispiel #7
0
/*
 * call-seq:
 *   walk!(letter) -> TrieNode
 *
 * Tries to walk down a particular branch of the Trie.  It modifies the node it is called on.
 *
 */
static VALUE rb_trie_node_walk_bang(VALUE self, VALUE rchar) {
	StringValue(rchar);

    TrieState *state;
    Data_Get_Struct(self, TrieState, state);

    if(RSTRING_LEN(rchar) != 1)
		return Qnil;

    Bool result = trie_state_walk(state, *RSTRING_PTR(rchar));
    
    if(result) {
		rb_iv_set(self, "@state", rchar);
		VALUE full_state = rb_iv_get(self, "@full_state");
		rb_str_append(full_state, rchar);
		rb_iv_set(self, "@full_state", full_state);
		return self;
    } else
		return Qnil;
}
Beispiel #8
0
/*
 * call-seq:
 *   walk(letter) -> TrieNode
 *
 * Tries to walk down a particular branch of the Trie.  It clones the node it is called on and 
 * walks with that one, leaving the original unchanged.
 *
 */
static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) {
	StringValue(rchar);

	VALUE new_node = rb_funcall(self, rb_intern("dup"), 0);

    TrieState *state;
    Data_Get_Struct(new_node, TrieState, state);

    if(RSTRING_LEN(rchar) != 1)
		return Qnil;

    Bool result = trie_state_walk(state, *RSTRING_PTR(rchar));
    
    if(result) {
		rb_iv_set(new_node, "@state", rchar);
		VALUE full_state = rb_iv_get(new_node, "@full_state");
		rb_str_append(full_state, rchar);
		rb_iv_set(new_node, "@full_state", full_state);
		return new_node;
    } else
		return Qnil;
}
Beispiel #9
0
static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
	int c;
    for(c = 1; c < 256; c++) {
		if(trie_state_is_walkable(state,c)) {
			TrieState *next_state = trie_state_clone(state);
			trie_state_walk(next_state, c);

			prefix[prefix_size] = c;
			prefix[prefix_size + 1] = 0;

			if(trie_state_is_terminal(next_state)) {
				char *word = (char*) malloc(prefix_size + 2);
				memcpy(word, prefix, prefix_size + 2);
				rb_ary_push(children, rb_str_new2(word));
			}

			walk_all_paths(trie, children, next_state, prefix, prefix_size + 1);
			
			prefix[prefix_size] = 0;
			trie_state_free(next_state);
		}
    }
}
Beispiel #10
0
static int
brk_recover_try (const thchar_t *s, int len,
                 const char *brkpos_hints,
                 int pos[], size_t n)
{
    BrkPool     *pool;
    BrkPool     *node;
    int          ret;

    pool = brk_root_pool (n);
    ret = 0;

    while (NULL != (node = brk_pool_get_node (pool))) {
        BrkShot *shot = &node->shot;
        BrkPool *match;
        int      is_keep_node, is_terminal;

        /* walk dictionary character-wise till a word is matched */
        is_keep_node = 1;
        do {
            if (!trie_state_walk (shot->dict_state,
                                  th_tis2uni (s[shot->str_pos++])))
            {
                is_terminal = 0;
                is_keep_node = 0;
                break;
            }

            is_terminal = trie_state_is_terminal (shot->dict_state);
            if (shot->str_pos >= len) {
                if (!is_terminal) {
                    is_keep_node = 0;
                }
                break;
            }
        } while (!(is_terminal && brkpos_hints[shot->str_pos]));

        if (!is_keep_node) {
            pool = brk_pool_delete (pool, node);
            continue;
        }

        /* if node still kept, mark break position and rewind dictionary */
        if (is_terminal) {
            if (shot->str_pos < len &&
                !trie_state_is_single (shot->dict_state))
            {
                /* add node to mark break position instead of current */
                node = brk_pool_node_new (shot);
                pool = brk_pool_add (pool, node);
                shot = &node->shot;
            }

            trie_state_rewind (shot->dict_state);
            shot->brk_pos [shot->cur_brk_pos++] = shot->str_pos;
        }

        if (shot->str_pos == len || shot->cur_brk_pos == n) {
            /* path is done; get result & remove it */
            if (shot->cur_brk_pos > ret) {
                ret = shot->cur_brk_pos;
                memcpy (pos, shot->brk_pos, ret * sizeof (pos[0]));
            }
            pool = brk_pool_delete (pool, node);
            /* stop as soon as first solution is found */
            if (ret == n)
                break;
        } else {
            /* find matched nodes, contest and keep the best one */
            while (NULL != (match = brk_pool_match (pool, node))) {
                pool = brk_pool_delete (pool, match);
            }
        }
    }

    brk_pool_free (pool);
    return ret;
}
Beispiel #11
0
static int
brk_maximal_do_impl (const thchar_t *s, int len,
                     const char *brkpos_hints,
                     int pos[], size_t n)
{
    BrkPool     *pool;
    BrkPool     *node;
    BestBrk     *best_brk;
    RecovHist    recov_hist;
    int          ret;

    pool = brk_root_pool (n);
    best_brk = best_brk_new (n);
    if (!best_brk)
        return 0;
    recov_hist.pos = recov_hist.recov = -1;

    while (NULL != (node = brk_pool_get_node (pool))) {
        BrkShot *shot = &node->shot;
        BrkPool *match;
        int      is_keep_node, is_terminal, is_recovered;
        int      str_pos;

        /* walk dictionary character-wise till a word is matched */
        is_keep_node = 1;
        is_recovered = 0;
        str_pos = shot->str_pos;
        do {
            if (!trie_state_walk (shot->dict_state, th_tis2uni (s[str_pos++])))
            {
                int recovered;

                is_terminal = 0;

                /* try to recover from error */
                recovered = brk_recover (s, len, shot->str_pos + 1,
                                         brkpos_hints, &recov_hist);
                if (-1 != recovered) {
                    /* add penalty by recovered - recent break pos */
                    shot->penalty += recovered;
                    if (shot->cur_brk_pos > 0)
                        shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1];

                    str_pos = recovered;
                    is_recovered = 1;
                } else {
                    /* add penalty with string len - recent break pos */
                    shot->penalty += len;
                    if (shot->cur_brk_pos > 0)
                        shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1];

                    shot->brk_pos [shot->cur_brk_pos++] = str_pos = len;
                    is_keep_node = 0;
                }
                break;
            }

            is_terminal = trie_state_is_terminal (shot->dict_state);
            if (str_pos >= len) {
                if (!is_terminal) {
                    /* add penalty with string len - recent break pos */
                    shot->penalty += len;
                    if (shot->cur_brk_pos > 0)
                        shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1];

                    shot->brk_pos [shot->cur_brk_pos++] = len;
                    is_keep_node = 0;
                }
                break;
            }
        } while (!(is_terminal && brkpos_hints[str_pos]));

        shot->str_pos = str_pos;

        /* if node still kept, mark break position and rewind dictionary */
        if (is_keep_node && (is_terminal || is_recovered)) {
            if (shot->str_pos < len && is_terminal &&
                !trie_state_is_single (shot->dict_state))
            {
                /* add node to mark break position instead of current */
                node = brk_pool_node_new (shot);
                pool = brk_pool_add (pool, node);
                shot = &node->shot;
            }

            trie_state_rewind (shot->dict_state);
            shot->brk_pos [shot->cur_brk_pos++] = shot->str_pos;
        }

        if (!is_keep_node || shot->str_pos == len || shot->cur_brk_pos >= n) {
            /* path is done; contest and remove */
            best_brk_contest (best_brk, shot);
            pool = brk_pool_delete (pool, node);
        } else {
            /* find matched nodes, contest and keep the best one */
            while (NULL != (match = brk_pool_match (pool, node))) {
                BrkPool *del_node;

                if (match->shot.penalty < node->shot.penalty ||
                    (match->shot.penalty == node->shot.penalty &&
                     match->shot.cur_brk_pos < node->shot.cur_brk_pos))
                {
                    del_node = node;
                    node = match;
                } else {
                    del_node = match;
                }
                pool = brk_pool_delete (pool, del_node);
            }
        }
    }

    ret = best_brk->cur_brk_pos;
    memcpy (pos, best_brk->brk_pos, ret * sizeof (pos[0]));

    brk_pool_free (pool);
    best_brk_free (best_brk);
    return ret;
}
Beispiel #12
0
bool ATTrie :: walk(ATTrie::Position pos, QChar c)
{
    Bool ret = trie_state_walk( (TrieState*)pos, c.unicode());
    return ret == true;
}