static VALUE walk_all_paths_with_values(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) { int c; for(c = 1; c < 256; c++) { if(trie_state_is_walkable(state,c)) { TrieState *next_state = trie_state_clone(state); trie_state_walk(next_state, c); prefix[prefix_size] = c; prefix[prefix_size + 1] = 0; if(trie_state_is_terminal(next_state)) { TrieState *end_state = trie_state_clone(next_state); trie_state_walk(end_state, '\0'); char *word = (char*) malloc(prefix_size + 2); memcpy(word, prefix, prefix_size + 2); VALUE tuple = rb_ary_new(); rb_ary_push(tuple, rb_str_new2(word)); TrieData trie_data = trie_state_get_data(end_state); rb_ary_push(tuple, (VALUE)trie_data); rb_ary_push(children, tuple); trie_state_free(end_state); } walk_all_paths_with_values(trie, children, next_state, prefix, prefix_size + 1); prefix[prefix_size] = 0; trie_state_free(next_state); } } }
/* * call-seq: * children(prefix) -> [ key, ... ] * * Finds all keys in the Trie beginning with the given prefix. * */ static VALUE rb_trie_children(VALUE self, VALUE prefix) { if(NIL_P(prefix)) return rb_ary_new(); StringValue(prefix); Trie *trie; Data_Get_Struct(self, Trie, trie); int prefix_size = RSTRING_LEN(prefix); TrieState *state = trie_root(trie); VALUE children = rb_ary_new(); TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix); const TrieChar *iterator = char_prefix; while(*iterator != 0) { if(!trie_state_is_walkable(state, *iterator)) return children; trie_state_walk(state, *iterator); iterator++; } if(trie_state_is_terminal(state)) rb_ary_push(children, prefix); char prefix_buffer[1024]; memcpy(prefix_buffer, char_prefix, prefix_size); prefix_buffer[prefix_size] = 0; walk_all_paths(trie, children, state, prefix_buffer, prefix_size); trie_state_free(state); return children; }
/* * call-seq: * walk(letter) -> TrieNode * * Tries to walk down a particular branch of the Trie. It clones the node it is called on and * walks with that one, leaving the original unchanged. * */ static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) { StringValue(rchar); VALUE new_node = rb_funcall(self, rb_intern("dup"), 0); TrieState *state; long string_length ; TrieChar *char_prefix ; long p; Bool result; Data_Get_Struct(new_node, TrieState, state); string_length = (long)NUM2LONG(rb_funcall(rchar, rb_intern("length"), 0)); if (string_length == 1) { char_prefix = (TrieChar*)RSTRING_PTR(rchar); for (p = 0; p < RSTRING_LEN(rchar); p++) { result = trie_state_walk(state, *char_prefix); if (!result) return Qnil; char_prefix++; } return new_node; } else return Qnil; }
Bool sb_trie_state_walk (SBTrieState *s, SBChar c) { return trie_state_walk (s->trie_state, alpha_map_char_to_alphabet (s->sb_trie->alpha_map, (UniChar) c)); }
/* * call-seq: * value * * Attempts to get the value at this node of the Trie. This only works if the node is a terminal * (i.e. end of a key), otherwise it returns nil. * */ static VALUE rb_trie_node_value(VALUE self) { TrieState *state; TrieState *dup; Data_Get_Struct(self, TrieState, state); dup = trie_state_clone(state); trie_state_walk(dup, 0); TrieData trie_data = trie_state_get_data(dup); trie_state_free(dup); return TRIE_DATA_ERROR == trie_data ? Qnil : (VALUE)trie_data; }
Bool trie_state_is_leaf (const TrieState *s) { TrieState *t; Bool ret; t = trie_state_clone (s); ret = trie_state_walk (t, TRIE_CHAR_TERM); trie_state_free (t); return ret; }
/* * call-seq: * walk!(letter) -> TrieNode * * Tries to walk down a particular branch of the Trie. It modifies the node it is called on. * */ static VALUE rb_trie_node_walk_bang(VALUE self, VALUE rchar) { StringValue(rchar); TrieState *state; Data_Get_Struct(self, TrieState, state); if(RSTRING_LEN(rchar) != 1) return Qnil; Bool result = trie_state_walk(state, *RSTRING_PTR(rchar)); if(result) { rb_iv_set(self, "@state", rchar); VALUE full_state = rb_iv_get(self, "@full_state"); rb_str_append(full_state, rchar); rb_iv_set(self, "@full_state", full_state); return self; } else return Qnil; }
/* * call-seq: * walk(letter) -> TrieNode * * Tries to walk down a particular branch of the Trie. It clones the node it is called on and * walks with that one, leaving the original unchanged. * */ static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) { StringValue(rchar); VALUE new_node = rb_funcall(self, rb_intern("dup"), 0); TrieState *state; Data_Get_Struct(new_node, TrieState, state); if(RSTRING_LEN(rchar) != 1) return Qnil; Bool result = trie_state_walk(state, *RSTRING_PTR(rchar)); if(result) { rb_iv_set(new_node, "@state", rchar); VALUE full_state = rb_iv_get(new_node, "@full_state"); rb_str_append(full_state, rchar); rb_iv_set(new_node, "@full_state", full_state); return new_node; } else return Qnil; }
static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) { int c; for(c = 1; c < 256; c++) { if(trie_state_is_walkable(state,c)) { TrieState *next_state = trie_state_clone(state); trie_state_walk(next_state, c); prefix[prefix_size] = c; prefix[prefix_size + 1] = 0; if(trie_state_is_terminal(next_state)) { char *word = (char*) malloc(prefix_size + 2); memcpy(word, prefix, prefix_size + 2); rb_ary_push(children, rb_str_new2(word)); } walk_all_paths(trie, children, next_state, prefix, prefix_size + 1); prefix[prefix_size] = 0; trie_state_free(next_state); } } }
static int brk_recover_try (const thchar_t *s, int len, const char *brkpos_hints, int pos[], size_t n) { BrkPool *pool; BrkPool *node; int ret; pool = brk_root_pool (n); ret = 0; while (NULL != (node = brk_pool_get_node (pool))) { BrkShot *shot = &node->shot; BrkPool *match; int is_keep_node, is_terminal; /* walk dictionary character-wise till a word is matched */ is_keep_node = 1; do { if (!trie_state_walk (shot->dict_state, th_tis2uni (s[shot->str_pos++]))) { is_terminal = 0; is_keep_node = 0; break; } is_terminal = trie_state_is_terminal (shot->dict_state); if (shot->str_pos >= len) { if (!is_terminal) { is_keep_node = 0; } break; } } while (!(is_terminal && brkpos_hints[shot->str_pos])); if (!is_keep_node) { pool = brk_pool_delete (pool, node); continue; } /* if node still kept, mark break position and rewind dictionary */ if (is_terminal) { if (shot->str_pos < len && !trie_state_is_single (shot->dict_state)) { /* add node to mark break position instead of current */ node = brk_pool_node_new (shot); pool = brk_pool_add (pool, node); shot = &node->shot; } trie_state_rewind (shot->dict_state); shot->brk_pos [shot->cur_brk_pos++] = shot->str_pos; } if (shot->str_pos == len || shot->cur_brk_pos == n) { /* path is done; get result & remove it */ if (shot->cur_brk_pos > ret) { ret = shot->cur_brk_pos; memcpy (pos, shot->brk_pos, ret * sizeof (pos[0])); } pool = brk_pool_delete (pool, node); /* stop as soon as first solution is found */ if (ret == n) break; } else { /* find matched nodes, contest and keep the best one */ while (NULL != (match = brk_pool_match (pool, node))) { pool = brk_pool_delete (pool, match); } } } brk_pool_free (pool); return ret; }
static int brk_maximal_do_impl (const thchar_t *s, int len, const char *brkpos_hints, int pos[], size_t n) { BrkPool *pool; BrkPool *node; BestBrk *best_brk; RecovHist recov_hist; int ret; pool = brk_root_pool (n); best_brk = best_brk_new (n); if (!best_brk) return 0; recov_hist.pos = recov_hist.recov = -1; while (NULL != (node = brk_pool_get_node (pool))) { BrkShot *shot = &node->shot; BrkPool *match; int is_keep_node, is_terminal, is_recovered; int str_pos; /* walk dictionary character-wise till a word is matched */ is_keep_node = 1; is_recovered = 0; str_pos = shot->str_pos; do { if (!trie_state_walk (shot->dict_state, th_tis2uni (s[str_pos++]))) { int recovered; is_terminal = 0; /* try to recover from error */ recovered = brk_recover (s, len, shot->str_pos + 1, brkpos_hints, &recov_hist); if (-1 != recovered) { /* add penalty by recovered - recent break pos */ shot->penalty += recovered; if (shot->cur_brk_pos > 0) shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1]; str_pos = recovered; is_recovered = 1; } else { /* add penalty with string len - recent break pos */ shot->penalty += len; if (shot->cur_brk_pos > 0) shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1]; shot->brk_pos [shot->cur_brk_pos++] = str_pos = len; is_keep_node = 0; } break; } is_terminal = trie_state_is_terminal (shot->dict_state); if (str_pos >= len) { if (!is_terminal) { /* add penalty with string len - recent break pos */ shot->penalty += len; if (shot->cur_brk_pos > 0) shot->penalty -= shot->brk_pos[shot->cur_brk_pos - 1]; shot->brk_pos [shot->cur_brk_pos++] = len; is_keep_node = 0; } break; } } while (!(is_terminal && brkpos_hints[str_pos])); shot->str_pos = str_pos; /* if node still kept, mark break position and rewind dictionary */ if (is_keep_node && (is_terminal || is_recovered)) { if (shot->str_pos < len && is_terminal && !trie_state_is_single (shot->dict_state)) { /* add node to mark break position instead of current */ node = brk_pool_node_new (shot); pool = brk_pool_add (pool, node); shot = &node->shot; } trie_state_rewind (shot->dict_state); shot->brk_pos [shot->cur_brk_pos++] = shot->str_pos; } if (!is_keep_node || shot->str_pos == len || shot->cur_brk_pos >= n) { /* path is done; contest and remove */ best_brk_contest (best_brk, shot); pool = brk_pool_delete (pool, node); } else { /* find matched nodes, contest and keep the best one */ while (NULL != (match = brk_pool_match (pool, node))) { BrkPool *del_node; if (match->shot.penalty < node->shot.penalty || (match->shot.penalty == node->shot.penalty && match->shot.cur_brk_pos < node->shot.cur_brk_pos)) { del_node = node; node = match; } else { del_node = match; } pool = brk_pool_delete (pool, del_node); } } } ret = best_brk->cur_brk_pos; memcpy (pos, best_brk->brk_pos, ret * sizeof (pos[0])); brk_pool_free (pool); best_brk_free (best_brk); return ret; }
bool ATTrie :: walk(ATTrie::Position pos, QChar c) { Bool ret = trie_state_walk( (TrieState*)pos, c.unicode()); return ret == true; }