void ST_PrintNode(SUFFIX_TREE* tree, NODE* node1, long depth) { NODE* node2 = node1->sons; long d = depth , start = node1->edge_label_start , end; end = get_node_label_end(tree, node1); if(depth>0) { /* Print the branches coming from higher nodes */ while(d>1) { printf("|"); d--; } printf("+"); /* Print the node itself */ while(start<=end) { printf("%c",tree->tree_string[start]); start++; } #ifdef DEBUG printf(" \t\t\t(%lu,%lu | %lu)",node1->edge_label_start,end,node1->path_position); #endif printf("\n"); } /* Recoursive call for all node1's sons */ while(node2!=0) { ST_PrintNode(tree,node2, depth+1); node2 = node2->right_sibling; } }
void SuffixTree_print_node(SuffixTree_T tree, Node_T node1, long depth) { Node_T node2 = node1->left_son; long d = depth , start = node1->edge_label_start , end; end = get_node_label_end(tree, node1); long orig_start = start; if(depth>0) { /* Print the branches coming from higher nodes */ while(d>1) { printf("|"); d--; } printf("+"); /* Print the node itself */ while(start<=end) { printf("%c",tree->tree_string[start]); start++; } printf("\t%zu\t%ld\t%ld\t%ld\t%ld", node1->index, orig_start, end, node1->path_position, node1->edge_depth); printf("\n"); } /* Recoursive call for all node1's sons */ while(node2!=0) { SuffixTree_print_node(tree,node2, depth+1); node2 = node2->right_sibling; } }
DBL_WORD ST_FindSubstring( /* The suffix array */ SUFFIX_TREE* tree, /* The substring to find */ char* W, /* The length of W */ DBL_WORD P) { /* Starts with the root's son that has the first character of W as its incoming edge first character */ NODE* node = find_son(tree, tree->root, W[0]); DBL_WORD k,j = 0, node_label_end; /* Scan nodes down from the root untill a leaf is reached or the substring is found */ while(node!=0) { k=node->edge_label_start; node_label_end = get_node_label_end(tree,node); /* Scan a single edge - compare each character with the searched string */ while(j<P && k<=node_label_end && tree->tree_string[k] == W[j]) { j++; k++; #ifdef STATISTICS counter++; #endif } /* Checking which of the stopping conditions are true */ if(j == P) { /* W was found - it is a substring. Return its path starting index */ return node->path_position; } else if(k > node_label_end) /* Current edge is found to match, continue to next edge */ node = find_son(tree, node, W[j]); else { /* One non-matching symbols is found - W is not a substring */ return ST_ERROR; } } return ST_ERROR; }
void ST_PrintFullNode(SUFFIX_TREE* tree, NODE* node) { long start, end; if(node==NULL) return; /* Calculating the begining and ending of the last edge */ start = node->edge_label_start; end = get_node_label_end(tree, node); /* Stoping condition - the root */ if(node->father!=tree->root) ST_PrintFullNode(tree,node->father); /* Print the last edge */ while(start<=end) { printf("%c",tree->tree_string[start]); start++; } }
SuffixTreeIndex_T SuffixTree_find_substring(const SuffixTree_T tree, char* query, SuffixTreeIndex_T query_length) { /* Starts with the root's son that has the first character of W as its incoming edge first character */ Node_T node = find_son(tree, tree->root, query[0]); SuffixTreeIndex_T k,j = 0, node_label_end; /* Scan nodes down from the root untill a leaf is reached or the substring is found */ while(node != NULL) { k=node->edge_label_start; node_label_end = get_node_label_end(tree,node); /* Scan a single edge - compare each character with the searched string */ while(j<query_length && k<=node_label_end && tree->tree_string[k] == query[j]) { j++; k++; } /* Checking which of the stopping conditions are true */ if(j == query_length) { /* W was found - it is a substring. Return its path starting index */ return node->path_position; } else if(k > node_label_end) /* Current edge is found to match, continue to next edge */ node = find_son(tree, node, query[j]); else { /* One non-matching symbols is found - W is not a substring */ return (SuffixTreeIndex_T)-1; } } return (SuffixTreeIndex_T)-1; }
void SEA( SUFFIX_TREE* tree, POS* pos, PATH str, DBL_WORD* rule_applied, char after_rule_3) { DBL_WORD chars_found = 0 , path_pos = str.begin; NODE* tmp; #ifdef DEBUG ST_PrintTree(tree); printf("extension: %lu phase+1: %lu",str.begin, str.end); if(after_rule_3 == 0) printf(" followed from (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos); else printf(" starting at (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos); #endif #ifdef STATISTICS counter++; #endif /* Follow suffix link only if it's not the first extension after rule 3 was applied */ if(after_rule_3 == 0) follow_suffix_link(tree, pos); #ifdef DEBUG #ifdef STATISTICS if(after_rule_3 == 0) printf("to (%lu,%lu | %lu). counter: %lu\n", pos->node->edge_label_start, get_node_label_end(tree,pos->node),pos->edge_pos,counter); else printf(". counter: %lu\n", counter); #endif #endif /* If node is root - trace whole string starting from the root, else - trace last character only */ if(pos->node == tree->root) { pos->node = trace_string(tree, tree->root, str, &(pos->edge_pos), &chars_found, no_skip); } else { str.begin = str.end; chars_found = 0; /* Consider 2 cases: 1. last character matched is the last of its edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos)) { /* Trace only last symbol of str, search in the NEXT edge (node) */ tmp = find_son(tree, pos->node, tree->tree_string[str.end]); if(tmp != 0) { pos->node = tmp; pos->edge_pos = 0; chars_found = 1; } } /* 2. last character matched is NOT the last of its edge */ else { /* Trace only last symbol of str, search in the CURRENT edge (node) */ if(tree->tree_string[pos->node->edge_label_start+pos->edge_pos+1] == tree->tree_string[str.end]) { pos->edge_pos++; chars_found = 1; } } } /* If whole string was found - rule 3 applies */ if(chars_found == str.end - str.begin + 1) { *rule_applied = 3; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != 0) { create_suffix_link(suffixless, pos->node->father); /* Marks that no internal node with no suffix link exists */ suffixless = 0; } #ifdef DEBUG printf("rule 3 (%lu,%lu)\n",str.begin,str.end); #endif return; } /* If last char found is the last char of an edge - add a character at the next edge */ if(is_last_char_in_edge(tree,pos->node,pos->edge_pos) || pos->node == tree->root) { /* Decide whether to apply rule 2 (new_son) or rule 1 */ if(pos->node->sons != 0) { /* Apply extension rule 2 new son - a new leaf is created and returned by apply_extension_rule_2 */ apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, 0, new_son); *rule_applied = 2; /* If there is an internal node that has no suffix link yet (only one may exist) - create a suffix link from it to the father-node of the current position in the tree (pos) */ if(suffixless != 0) { create_suffix_link(suffixless, pos->node); /* Marks that no internal node with no suffix link exists */ suffixless = 0; } } } else { /* Apply extension rule 2 split - a new node is created and returned by apply_extension_rule_2 */ tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, pos->edge_pos, split); if(suffixless != 0) create_suffix_link(suffixless, tmp); /* Link root's sons with a single character to the root */ if(get_node_label_length(tree,tmp) == 1 && tmp->father == tree->root) { tmp->suffix_link = tree->root; /* Marks that no internal node with no suffix link exists */ suffixless = 0; } else /* Mark tmp as waiting for a link */ suffixless = tmp; /* Prepare pos for the next extension */ pos->node = tmp; *rule_applied = 2; } }
DBL_WORD get_node_label_length(SUFFIX_TREE* tree, NODE* node) { /* Calculate and return the lentgh of the node */ return get_node_label_end(tree, node) - node->edge_label_start + 1; }
int Node_is_leaf(Node_T node, SuffixTree_T tree) { return get_node_label_end(tree, node) == tree->e; }
SuffixTreeIndex_T Node_get_incoming_edge_length(Node_T node, SuffixTree_T tree) { return get_node_label_end(tree, node) - node->edge_label_start + 1; }
SuffixTreeIndex_T get_node_label_length(SuffixTree_T tree, Node_T node) { /* Calculate and return the lentgh of the node */ return get_node_label_end(tree, node) - node->edge_label_start + 1; }