int SEA(SuffixTree_T tree, struct SuffixTreePos* pos, struct SuffixTreePath str, SuffixTreeIndex_T* rule_applied, char after_rule_3) { SuffixTreeIndex_T chars_found = 0 , path_pos = str.begin; Node_T tmp = NULL; /* Follow suffix link only if it's not the first extension after rule 3 was applied */ if(after_rule_3 == 0) follow_suffix_link(tree, pos); /* If node is root - trace whole string starting from the root, else - trace last character only */ if(pos->node == tree->root) { pos->node = trace_string(tree, tree->root, str, &(pos->edge_pos), &chars_found, no_skip); } else { str.begin = str.end; chars_found = 0; /* Consider 2 cases: 1. last character matched is the last of its edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos)) { /* Trace only last symbol of str, search in the NEXT edge (node) */ tmp = find_son(tree, pos->node, tree->tree_string[str.end]); if(tmp != NULL) { pos->node = tmp; pos->edge_pos = 0; chars_found = 1; } } /* 2. last character matched is NOT the last of its edge */ else { /* Trace only last symbol of str, search in the CURRENT edge (node) */ if(tree->tree_string[pos->node->edge_label_start+pos->edge_pos+1] == tree->tree_string[str.end]) { pos->edge_pos++; chars_found = 1; } } } /* If whole string was found - rule 3 applies */ if(chars_found == str.end - str.begin + 1) { *rule_applied = 3; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != NULL) { create_suffix_link(suffixless, pos->node->father); /* Marks that no internal node with no suffix link exists */ suffixless = NULL; } #ifdef DEBUG printf("rule 3 (%zu,%zu)\n",str.begin,str.end); #endif return 0; } /* If last char found is the last char of an edge - add a character at the next edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos) || pos->node == tree->root) { /* Decide whether to apply rule 2 (new_son) or rule 1 */ if(pos->node->left_son != NULL) { /* Apply extension rule 2 new son - a new leaf is created and returned by apply_extension_rule_2 */ tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, 0, new_son); check(tmp, "Could not apply extension rule 2."); *rule_applied = 2; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != NULL) { create_suffix_link(suffixless, pos->node); /* Marks that no internal node with no suffix link exists */ suffixless = NULL; } } } else { /* Apply extension rule 2 split - a new node is created and returned by apply_extension_rule_2 */ tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, pos->edge_pos, split); check(tmp, "Could not apply extension rule 2."); if(suffixless != NULL) create_suffix_link(suffixless, tmp); /* Link root's sons with a single character to the root */ if(get_node_label_length(tree,tmp) == 1 && tmp->father == tree->root) { tmp->suffix_link = tree->root; /* Marks that no internal node with no suffix link exists */ suffixless = NULL; } else /* Mark tmp as waiting for a link */ suffixless = tmp; /* Prepare pos for the next extension */ pos->node = tmp; *rule_applied = 2; } return 0; error: return 1; }
char is_last_char_in_edge(SUFFIX_TREE* tree, NODE* node, DBL_WORD edge_pos) { if(edge_pos == get_node_label_length(tree,node)-1) return 1; return 0; }
Node_T trace_single_edge(SuffixTree_T tree, Node_T node, struct SuffixTreePath str, SuffixTreeIndex_T* edge_pos, SuffixTreeIndex_T* chars_found, Skip_T type, int* search_done) { Node_T cont_node; SuffixTreeIndex_T length, str_len; /* Set default return values */ *search_done = 1; *edge_pos = 0; /* Search for the first character of the string in the outcoming edge of node */ cont_node = find_son(tree, node, tree->tree_string[str.begin]); if(cont_node == NULL) { /* Search is done, string not found */ *edge_pos = get_node_label_length(tree,node)-1; *chars_found = 0; return node; } /* Found first character - prepare for continuing the search */ node = cont_node; length = get_node_label_length(tree,node); str_len = str.end - str.begin + 1; /* Compare edge length and string length. */ /* If edge is shorter then the string being searched and skipping is enabled - skip edge */ if(type == skip) { if(length <= str_len) { (*chars_found) = length; (*edge_pos) = length-1; if(length < str_len) *search_done = 0; } else { (*chars_found) = str_len; (*edge_pos) = str_len-1; } return node; } else { /* Find minimum out of edge length and string length, and scan it */ if(str_len < length) length = str_len; for(*edge_pos=1, *chars_found=1; *edge_pos<length; (*chars_found)++,(*edge_pos)++) { /* Compare current characters of the string and the edge. If equal - continue */ if(tree->tree_string[node->edge_label_start+*edge_pos] != tree->tree_string[str.begin+*edge_pos]) { (*edge_pos)--; return node; } } } /* The loop has advanced *edge_pos one too much */ (*edge_pos)--; if((*chars_found) < str_len) /* Search is not done yet */ *search_done = 0; return node; }