static void da_relocate_base (DArray *d, TrieIndex s, TrieIndex new_base) { TrieIndex old_base; Symbols *symbols; int i; old_base = da_get_base (d, s); symbols = da_output_symbols (d, s); for (i = 0; i < symbols_num (symbols); i++) { TrieIndex old_next, new_next, old_next_base; old_next = old_base + symbols_get (symbols, i); new_next = new_base + symbols_get (symbols, i); old_next_base = da_get_base (d, old_next); /* allocate new next node and copy BASE value */ da_alloc_cell (d, new_next); da_set_check (d, new_next, s); da_set_base (d, new_next, old_next_base); /* old_next node is now moved to new_next * so, all cells belonging to old_next * must be given to new_next */ /* preventing the case of TAIL pointer */ if (old_next_base > 0) { TrieIndex c, max_c; max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - old_next_base); for (c = 0; c < max_c; c++) { if (da_get_check (d, old_next_base + c) == old_next) da_set_check (d, old_next_base + c, new_next); } } /* free old_next node */ da_free_cell (d, old_next); } symbols_free (symbols); /* finally, make BASE[s] point to new_base */ da_set_base (d, s, new_base); }
static Bool da_enumerate_recursive (const DArray *d, TrieIndex state, DAEnumFunc enum_func, void *user_data) { Bool ret; TrieIndex base; base = da_get_base (d, state); if (base < 0) { TrieChar *key; key = da_get_state_key (d, state); ret = (*enum_func) (key, state, user_data); free (key); } else { Symbols *symbols; int i; ret = TRUE; symbols = da_output_symbols (d, state); for (i = 0; ret && i < symbols_num (symbols); i++) { ret = da_enumerate_recursive (d, base + symbols_get (symbols, i), enum_func, user_data); } symbols_free (symbols); } return ret; }
/** * @brief Find next separate node in a sub-trie * * @param d : the double-array structure * @param root : the sub-trie root to search from * @param sep : the current separate node * @param keybuff : the TrieString buffer for incrementally calcuating key * * @return index to the next separate node; TRIE_INDEX_ERROR if no more * separate node is found * * Find the next separate node under a sub-trie rooted at @a root starting * from the current separate node @a sep. * * On return, @a keybuff is incrementally updated from the key which walks * to previous separate node to the one which walks to the new separate node. * So, it is assumed to be initialized by at least one da_first_separate() * call before. This incremental key calculation is more efficient than later * totally reconstructing key from the given separate node. * * Available since: 0.2.6 */ TrieIndex da_next_separate (DArray *d, TrieIndex root, TrieIndex sep, TrieString *keybuff) { TrieIndex parent; TrieIndex base; TrieIndex c, max_c; while (sep != root) { parent = da_get_check (d, sep); base = da_get_base (d, parent); c = sep - base; trie_string_cut_last (keybuff); /* find next sibling of sep */ max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base); while (++c <= max_c) { if (da_get_check (d, base + c) == parent) { trie_string_append_char (keybuff, c); return da_first_separate (d, base + c, keybuff); } } sep = parent; } return TRIE_INDEX_ERROR; }
/** * @brief Insert a branch from trie node * * @param d : the double-array structure * @param s : the state to add branch to * @param c : the character for the branch label * * @return the index of the new node * * Insert a new arc labelled with character @a c from the trie node * represented by index @a s in double-array structure @a d. * Note that it assumes that no such arc exists before inserting. */ TrieIndex da_insert_branch (DArray *d, TrieIndex s, TrieChar c) { TrieIndex base, next; base = da_get_base (d, s); if (base > 0) { next = base + c; /* if already there, do not actually insert */ if (da_get_check (d, next) == s) return next; /* if (base + c) > TRIE_INDEX_MAX which means 'next' is overflow, * or cell [next] is not free, relocate to a free slot */ if (base > TRIE_INDEX_MAX - c || !da_check_free_cell (d, next)) { Symbols *symbols; TrieIndex new_base; /* relocate BASE[s] */ symbols = da_output_symbols (d, s); symbols_add (symbols, c); new_base = da_find_free_base (d, symbols); symbols_free (symbols); if (TRIE_INDEX_ERROR == new_base) return TRIE_INDEX_ERROR; da_relocate_base (d, s, new_base); next = new_base + c; } } else { Symbols *symbols; TrieIndex new_base; symbols = symbols_new (); symbols_add (symbols, c); new_base = da_find_free_base (d, symbols); symbols_free (symbols); if (TRIE_INDEX_ERROR == new_base) return TRIE_INDEX_ERROR; da_set_base (d, s, new_base); next = new_base + c; } da_alloc_cell (d, next); da_set_check (d, next, s); return next; }
/** * @brief Walk in double-array structure * * @param d : the double-array structure * @param s : current state * @param c : the input character * * @return boolean indicating success * * Walk the double-array trie from state @a *s, using input character @a c. * If there exists an edge from @a *s with arc labeled @a c, this function * returns TRUE and @a *s is updated to the new state. Otherwise, it returns * FALSE and @a *s is left unchanged. */ Bool da_walk (const DArray *d, TrieIndex *s, TrieChar c) { TrieIndex next; next = da_get_base (d, *s) + c; if (da_get_check (d, next) == *s) { *s = next; return TRUE; } return FALSE; }
static void da_alloc_cell (DArray *d, TrieIndex cell) { TrieIndex prev, next; prev = -da_get_base (d, cell); next = -da_get_check (d, cell); /* remove the cell from free list */ da_set_check (d, prev, -next); da_set_base (d, next, -prev); }
static void da_free_cell (DArray *d, TrieIndex cell) { TrieIndex i, prev; /* find insertion point */ i = -da_get_check (d, da_get_free_list (d)); while (i != da_get_free_list (d) && i < cell) i = -da_get_check (d, i); prev = -da_get_base (d, i); /* insert cell before i */ da_set_check (d, cell, -i); da_set_base (d, cell, -prev); da_set_check (d, prev, -cell); da_set_base (d, i, -cell); }
static Symbols * da_output_symbols (const DArray *d, TrieIndex s) { Symbols *syms; TrieIndex base; TrieIndex c, max_c; syms = symbols_new (); base = da_get_base (d, s); max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base); for (c = 0; c < max_c; c++) { if (da_get_check (d, base + c) == s) symbols_add_fast (syms, (TrieChar) c); } return syms; }
static Bool da_has_children (DArray *d, TrieIndex s) { TrieIndex base; TrieIndex c, max_c; base = da_get_base (d, s); if (TRIE_INDEX_ERROR == base || base < 0) return FALSE; max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base); for (c = 0; c < max_c; c++) { if (da_get_check (d, base + c) == s) return TRUE; } return FALSE; }
/** * @brief Find first separate node in a sub-trie * * @param d : the double-array structure * @param root : the sub-trie root to search from * @param keybuff : the TrieString buffer for incrementally calcuating key * * @return index to the first separate node; TRIE_INDEX_ERROR on any failure * * Find the first separate node under a sub-trie rooted at @a root. * * On return, @a keybuff is appended with the key characters which walk from * @a root to the separate node. This is for incrementally calculating the * transition key, which is more efficient than later totally reconstructing * key from the given separate node. * * Available since: 0.2.6 */ TrieIndex da_first_separate (DArray *d, TrieIndex root, TrieString *keybuff) { TrieIndex base; TrieIndex c, max_c; while ((base = da_get_base (d, root)) >= 0) { max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base); for (c = 0; c <= max_c; c++) { if (da_get_check (d, base + c) == root) break; } if (c == max_c) return TRIE_INDEX_ERROR; trie_string_append_char (keybuff, c); root = base + c; } return root; }
static TrieChar * da_get_state_key (const DArray *d, TrieIndex state) { TrieChar *key; int key_size, key_length; int i; key_size = 20; key_length = 0; key = (TrieChar *) malloc (key_size); /* trace back to root */ while (da_get_root (d) != state) { TrieIndex parent; if (key_length + 1 >= key_size) { key_size += 20; key = (TrieChar *) realloc (key, key_size); } parent = da_get_check (d, state); key[key_length++] = (TrieChar) (state - da_get_base (d, parent)); state = parent; } key[key_length] = '\0'; /* reverse the string */ for (i = 0; i < --key_length; i++) { TrieChar temp; temp = key[i]; key[i] = key[key_length]; key[key_length] = temp; } return key; }
static Bool da_extend_pool (DArray *d, TrieIndex to_index) { TrieIndex new_begin; TrieIndex i; TrieIndex free_tail; if (to_index <= 0 || TRIE_INDEX_MAX <= to_index) return FALSE; if (to_index < d->num_cells) return TRUE; d->cells = (DACell *) realloc (d->cells, (to_index + 1) * sizeof (DACell)); new_begin = d->num_cells; d->num_cells = to_index + 1; /* initialize new free list */ for (i = new_begin; i < to_index; i++) { da_set_check (d, i, -(i + 1)); da_set_base (d, i + 1, -i); } /* merge the new circular list to the old */ free_tail = -da_get_base (d, da_get_free_list (d)); da_set_check (d, free_tail, -new_begin); da_set_base (d, new_begin, -free_tail); da_set_check (d, to_index, -da_get_free_list (d)); da_set_base (d, da_get_free_list (d), -to_index); /* update header cell */ d->cells[0].check = d->num_cells; return TRUE; }
static inline trie_idx_t trie_da_get_tail_index(const_darray_t da, trie_idx_t s) { return -da_get_base(da, s); }
/*------------------------* * INTERNAL FUNCTIONS * *------------------------*/ static inline bool trie_da_separate_p(const_darray_t da, trie_idx_t s) { return da_get_base(da, s) < 0; }