int main() { /*Will hold the position of the substring if exists in the tree.*/ DBL_WORD position; /*Create the suffix tree*/ SUFFIX_TREE* tree = ST_CreateTree("mississippi", 11); /*Print the suffix tree.*/ ST_PrintTree(tree); /*Search for a substring in the tree and return its position if exists.*/ position = ST_FindSubstring(tree, "ssis", 4); /*Print the position of the substring*/ printf("\nPosition of ssis in mississippi is %ld.\n\n", position); /*Delete the tree and all its nodes.*/ ST_DeleteTree(tree); }
void SEA( SUFFIX_TREE* tree, POS* pos, PATH str, DBL_WORD* rule_applied, char after_rule_3) { DBL_WORD chars_found = 0 , path_pos = str.begin; NODE* tmp; #ifdef DEBUG ST_PrintTree(tree); printf("extension: %lu phase+1: %lu",str.begin, str.end); if(after_rule_3 == 0) printf(" followed from (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos); else printf(" starting at (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos); #endif #ifdef STATISTICS counter++; #endif /* Follow suffix link only if it's not the first extension after rule 3 was applied */ if(after_rule_3 == 0) follow_suffix_link(tree, pos); #ifdef DEBUG #ifdef STATISTICS if(after_rule_3 == 0) printf("to (%lu,%lu | %lu). counter: %lu\n", pos->node->edge_label_start, get_node_label_end(tree,pos->node),pos->edge_pos,counter); else printf(". counter: %lu\n", counter); #endif #endif /* If node is root - trace whole string starting from the root, else - trace last character only */ if(pos->node == tree->root) { pos->node = trace_string(tree, tree->root, str, &(pos->edge_pos), &chars_found, no_skip); } else { str.begin = str.end; chars_found = 0; /* Consider 2 cases: 1. last character matched is the last of its edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos)) { /* Trace only last symbol of str, search in the NEXT edge (node) */ tmp = find_son(tree, pos->node, tree->tree_string[str.end]); if(tmp != 0) { pos->node = tmp; pos->edge_pos = 0; chars_found = 1; } } /* 2. last character matched is NOT the last of its edge */ else { /* Trace only last symbol of str, search in the CURRENT edge (node) */ if(tree->tree_string[pos->node->edge_label_start+pos->edge_pos+1] == tree->tree_string[str.end]) { pos->edge_pos++; chars_found = 1; } } } /* If whole string was found - rule 3 applies */ if(chars_found == str.end - str.begin + 1) { *rule_applied = 3; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != 0) { create_suffix_link(suffixless, pos->node->father); /* Marks that no internal node with no suffix link exists */ suffixless = 0; } #ifdef DEBUG printf("rule 3 (%lu,%lu)\n",str.begin,str.end); #endif return; } /* If last char found is the last char of an edge - add a character at the next edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos) || pos->node == tree->root) { /* Decide whether to apply rule 2 (new_son) or rule 1 */ if(pos->node->sons != 0) { /* Apply extension rule 2 new son - a new leaf is created and returned by apply_extension_rule_2 */ apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, 0, new_son); *rule_applied = 2; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != 0) { create_suffix_link(suffixless, pos->node); /* Marks that no internal node with no suffix link exists */ suffixless = 0; } } } else { /* Apply extension rule 2 split - a new node is created and returned by apply_extension_rule_2 */ tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, pos->edge_pos, split); if(suffixless != 0) create_suffix_link(suffixless, tmp); /* Link root's sons with a single character to the root */ if(get_node_label_length(tree,tmp) == 1 && tmp->father == tree->root) { tmp->suffix_link = tree->root; /* Marks that no internal node with no suffix link exists */ suffixless = 0; } else /* Mark tmp as waiting for a link */ suffixless = tmp; /* Prepare pos for the next extension */ pos->node = tmp; *rule_applied = 2; } }