/** * Remove unuseful failure links to skip using an invalid transition * * @param ac_tree the matcher that holds the patterns * @param state the state where it should start (it's used recursively) * */ static void ib_ac_unlink_unuseful(ib_ac_t *ac_tree, ib_ac_state_t *state) { IB_FTRACE_INIT(); ib_ac_state_t *child = NULL; ib_ac_state_t *fail_state = NULL; ib_ac_state_t *found = NULL; for (child = state->child; child != NULL; child = child->sibling) { if (child->fail == NULL || child->fail->child == NULL || child->child == NULL) { continue; } for (fail_state = child->fail->child; fail_state != ac_tree->root && fail_state != NULL; fail_state = fail_state->sibling) { found = ib_ac_child_for_code(child, fail_state->letter); if (found == NULL) { break; } } if (found != NULL) { /* There's no transition in the fail state that will * success, since the fail state doesn't have any letter not * present at the goto() of the main state. So let's * change the fail state to parent. Consider that this is * different to the output links (they'll still valid) */ child->fail = ac_tree->root; /* printf("Removing invalid fails\n"); */ } } for (child = state->child; child != NULL; child = child->sibling) { if (child->child != NULL) { ib_ac_unlink_unuseful(ac_tree, child); } } IB_FTRACE_RET_VOID(); }
/** * Constructs fail links of branches (the failure transition function) * * @param ac_tree the ac tree matcher * * @return ib_status_t status of the operation */ static ib_status_t ib_ac_link_fail_states(ib_ac_t *ac_tree) { ib_status_t rc; ib_ac_state_t *child = NULL; ib_ac_state_t *state = NULL; ib_ac_state_t *goto_state = NULL; ib_list_t *iter_queue = NULL; if (ac_tree->flags & IB_AC_FLAG_PARSER_COMPILED) { return IB_OK; } ac_tree->root->pattern = 0; rc = ib_list_create(&iter_queue, ac_tree->mp); if (rc != IB_OK) { return rc; } ac_tree->root->fail = ac_tree->root; /* All first-level children will fail back to root state */ for (child = ac_tree->root->child; child != NULL; child = child->sibling) { child->fail = ac_tree->root; rc = ib_list_enqueue(iter_queue, (void *) child); if (rc != IB_OK) { return rc; } } while (ib_list_elements(iter_queue) > 0) { rc = ib_list_dequeue(iter_queue, (void *) &state); if (rc != IB_OK) { return rc; } state->fail = ac_tree->root; if (state->parent != ac_tree->root) { goto_state = ib_ac_child_for_code(state->parent->fail, state->letter); if (goto_state != NULL) { state->fail = goto_state; } } for (child = state->child; child != NULL; child = child->sibling) { rc = ib_list_enqueue(iter_queue, (void *) child); if (rc != IB_OK) { return rc; } } } /* Link common outputs of subpatterns present in the branch*/ ib_ac_link_outputs(ac_tree, ac_tree->root); /* Unlink invalid fail transitions. This guarantees that there will * be at least one letter with transition in each fail state*/ ib_ac_unlink_unuseful(ac_tree, ac_tree->root); if (ac_tree->root->child != NULL) { ib_ac_build_bintree(ac_tree, ac_tree->root); } ac_tree->flags |= IB_AC_FLAG_PARSER_COMPILED; return IB_OK; }
/** * Adds a pattern into the trie * * @param ac_tree pointer to the matcher * @param pattern to add * @param callback function pointer to call if pattern is found * @param data pointer to pass to the callback if pattern is found * @param len the length of the pattern * * @returns Status code */ ib_status_t ib_ac_add_pattern(ib_ac_t *ac_tree, const char *pattern, ib_ac_callback_t callback, void *data, size_t len) { ib_ac_state_t *parent = NULL; ib_ac_state_t *child = NULL; size_t length = 0; size_t i = 0; size_t j = 0; if (ac_tree->flags & IB_AC_FLAG_PARSER_READY) { return IB_DECLINED; } length = (len == 0) ? strlen(pattern) : len; parent = ac_tree->root; for (i = 0; i < length; ++i) { ib_ac_char_t letter = pattern[i]; if (ac_tree->flags & IB_AC_FLAG_PARSER_NOCASE) { letter = tolower(letter); } child = ib_ac_child_for_code(parent, letter); if (child == NULL) { child = (ib_ac_state_t *)ib_mpool_calloc(ac_tree->mp, 1, sizeof(ib_ac_state_t)); if (child== NULL) { return IB_EALLOC; } child->letter = letter; child->level = i; child->pattern = (char *)ib_mpool_calloc(ac_tree->mp, 1, i + 2); if (child->pattern == NULL) { return IB_EALLOC; } /* Copy the content it should match to reach this state. * If the state produces an output, it will be the pattern * it self */ for (j = 0; j <= i; ++j) { child->pattern[j] = pattern[j]; } child->pattern[i + 1] = '\0'; } if (i == length - 1) { if ((child->flags & IB_AC_FLAG_STATE_OUTPUT) == 0) { ++ac_tree->pattern_cnt; child->flags |= IB_AC_FLAG_STATE_OUTPUT; } child->callback = (ib_ac_callback_t) callback; child->data = data; } ib_ac_add_child(parent, child); parent = child; } /* It needs to be compiled */ ac_tree->flags &= ~IB_AC_FLAG_PARSER_COMPILED; return IB_OK; }