void follow_suffix_link(SUFFIX_TREE* tree, POS* pos) { /* gama is the string between node and its father, in case node doesn't have a suffix link */ PATH gama; /* dummy argument for trace_string function */ DBL_WORD chars_found = 0; if(pos->node == tree->root) { return; } /* If node has no suffix link yet or in the middle of an edge - remember the edge between the node and its father (gama) and follow its father's suffix link (it must have one by Ukkonen's lemma). After following, trace down gama - it must exist in the tree (and thus can use the skip trick - see trace_string function description) */ if(pos->node->suffix_link == 0 || is_last_char_in_edge(tree,pos->node,pos->edge_pos) == 0) { /* If the node's father is the root, than no use following it's link (it is linked to itself). Tracing from the root (like in the naive algorithm) is required and is done by the calling function SEA uppon recieving a return value of tree->root from this function */ if(pos->node->father == tree->root) { pos->node = tree->root; return; } /* Store gama - the indices of node's incoming edge */ gama.begin = pos->node->edge_label_start; gama.end = pos->node->edge_label_start + pos->edge_pos; /* Follow father's suffix link */ pos->node = pos->node->father->suffix_link; /* Down-walk gama back to suffix_link's son */ pos->node = trace_string(tree, pos->node, gama, &(pos->edge_pos), &chars_found, skip); } else { /* If a suffix link exists - just follow it */ pos->node = pos->node->suffix_link; pos->edge_pos = get_node_label_length(tree,pos->node)-1; } }
void SEA( SUFFIX_TREE* tree, POS* pos, PATH str, DBL_WORD* rule_applied, char after_rule_3) { DBL_WORD chars_found = 0 , path_pos = str.begin; NODE* tmp; #ifdef DEBUG ST_PrintTree(tree); printf("extension: %lu phase+1: %lu",str.begin, str.end); if(after_rule_3 == 0) printf(" followed from (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos); else printf(" starting at (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos); #endif #ifdef STATISTICS counter++; #endif /* Follow suffix link only if it's not the first extension after rule 3 was applied */ if(after_rule_3 == 0) follow_suffix_link(tree, pos); #ifdef DEBUG #ifdef STATISTICS if(after_rule_3 == 0) printf("to (%lu,%lu | %lu). counter: %lu\n", pos->node->edge_label_start, get_node_label_end(tree,pos->node),pos->edge_pos,counter); else printf(". counter: %lu\n", counter); #endif #endif /* If node is root - trace whole string starting from the root, else - trace last character only */ if(pos->node == tree->root) { pos->node = trace_string(tree, tree->root, str, &(pos->edge_pos), &chars_found, no_skip); } else { str.begin = str.end; chars_found = 0; /* Consider 2 cases: 1. last character matched is the last of its edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos)) { /* Trace only last symbol of str, search in the NEXT edge (node) */ tmp = find_son(tree, pos->node, tree->tree_string[str.end]); if(tmp != 0) { pos->node = tmp; pos->edge_pos = 0; chars_found = 1; } } /* 2. last character matched is NOT the last of its edge */ else { /* Trace only last symbol of str, search in the CURRENT edge (node) */ if(tree->tree_string[pos->node->edge_label_start+pos->edge_pos+1] == tree->tree_string[str.end]) { pos->edge_pos++; chars_found = 1; } } } /* If whole string was found - rule 3 applies */ if(chars_found == str.end - str.begin + 1) { *rule_applied = 3; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != 0) { create_suffix_link(suffixless, pos->node->father); /* Marks that no internal node with no suffix link exists */ suffixless = 0; } #ifdef DEBUG printf("rule 3 (%lu,%lu)\n",str.begin,str.end); #endif return; } /* If last char found is the last char of an edge - add a character at the next edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos) || pos->node == tree->root) { /* Decide whether to apply rule 2 (new_son) or rule 1 */ if(pos->node->sons != 0) { /* Apply extension rule 2 new son - a new leaf is created and returned by apply_extension_rule_2 */ apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, 0, new_son); *rule_applied = 2; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != 0) { create_suffix_link(suffixless, pos->node); /* Marks that no internal node with no suffix link exists */ suffixless = 0; } } } else { /* Apply extension rule 2 split - a new node is created and returned by apply_extension_rule_2 */ tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, pos->edge_pos, split); if(suffixless != 0) create_suffix_link(suffixless, tmp); /* Link root's sons with a single character to the root */ if(get_node_label_length(tree,tmp) == 1 && tmp->father == tree->root) { tmp->suffix_link = tree->root; /* Marks that no internal node with no suffix link exists */ suffixless = 0; } else /* Mark tmp as waiting for a link */ suffixless = tmp; /* Prepare pos for the next extension */ pos->node = tmp; *rule_applied = 2; } }
int SEA(SuffixTree_T tree, struct SuffixTreePos* pos, struct SuffixTreePath str, SuffixTreeIndex_T* rule_applied, char after_rule_3) { SuffixTreeIndex_T chars_found = 0 , path_pos = str.begin; Node_T tmp = NULL; /* Follow suffix link only if it's not the first extension after rule 3 was applied */ if(after_rule_3 == 0) follow_suffix_link(tree, pos); /* If node is root - trace whole string starting from the root, else - trace last character only */ if(pos->node == tree->root) { pos->node = trace_string(tree, tree->root, str, &(pos->edge_pos), &chars_found, no_skip); } else { str.begin = str.end; chars_found = 0; /* Consider 2 cases: 1. last character matched is the last of its edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos)) { /* Trace only last symbol of str, search in the NEXT edge (node) */ tmp = find_son(tree, pos->node, tree->tree_string[str.end]); if(tmp != NULL) { pos->node = tmp; pos->edge_pos = 0; chars_found = 1; } } /* 2. last character matched is NOT the last of its edge */ else { /* Trace only last symbol of str, search in the CURRENT edge (node) */ if(tree->tree_string[pos->node->edge_label_start+pos->edge_pos+1] == tree->tree_string[str.end]) { pos->edge_pos++; chars_found = 1; } } } /* If whole string was found - rule 3 applies */ if(chars_found == str.end - str.begin + 1) { *rule_applied = 3; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != NULL) { create_suffix_link(suffixless, pos->node->father); /* Marks that no internal node with no suffix link exists */ suffixless = NULL; } #ifdef DEBUG printf("rule 3 (%zu,%zu)\n",str.begin,str.end); #endif return 0; } /* If last char found is the last char of an edge - add a character at the next edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos) || pos->node == tree->root) { /* Decide whether to apply rule 2 (new_son) or rule 1 */ if(pos->node->left_son != NULL) { /* Apply extension rule 2 new son - a new leaf is created and returned by apply_extension_rule_2 */ tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, 0, new_son); check(tmp, "Could not apply extension rule 2."); *rule_applied = 2; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != NULL) { create_suffix_link(suffixless, pos->node); /* Marks that no internal node with no suffix link exists */ suffixless = NULL; } } } else { /* Apply extension rule 2 split - a new node is created and returned by apply_extension_rule_2 */ tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, pos->edge_pos, split); check(tmp, "Could not apply extension rule 2."); if(suffixless != NULL) create_suffix_link(suffixless, tmp); /* Link root's sons with a single character to the root */ if(get_node_label_length(tree,tmp) == 1 && tmp->father == tree->root) { tmp->suffix_link = tree->root; /* Marks that no internal node with no suffix link exists */ suffixless = NULL; } else /* Mark tmp as waiting for a link */ suffixless = tmp; /* Prepare pos for the next extension */ pos->node = tmp; *rule_applied = 2; } return 0; error: return 1; }