/* * int_stree_disc_from_parent * * Disconnect a node from its parent in the tree. * NOTE: This procedure only does the link manipulation part of the * disconnection process. int_stree_disconnect is the real * disconnection function. * * Parameters: tree - A suffix tree * parent - The parent node * child - The child to be disconnected * * Return: nothing. */ void int_stree_disc_from_parent(SUFFIX_TREE tree, STREE_NODE parent, STREE_NODE child) { STREE_NODE node, back; node = stree_get_children(tree, parent); back = NULL; while (node != NULL && node != child) { back = node; node = stree_get_next(tree, node); } if (node == NULL) return; node = stree_get_next(tree, node); if (back == NULL) { if (int_stree_has_intleaves(tree, parent)) ((STREE_INTLEAF) (parent->children))->nextchild = node; else parent->children = node; } else { if (node == NULL) { back->next = parent; back->nextisparent = 1; } else back->next = node; } }
/* * int_stree_delete_subtree * * Free up all of the memory associated with the subtree rooted at node. * * Parameters: tree - a suffix tree * node - a tree node * * Return: nothing. */ void int_stree_delete_subtree(SUFFIX_TREE tree, STREE_NODE node) { STREE_NODE child, next; STREE_INTLEAF intleaf, intnext; if (int_stree_isaleaf(tree, node)) int_stree_free_leaf(tree, (STREE_LEAF) node); else { child = stree_get_children(tree, node); while (child != NULL) { next = stree_get_next(tree, child); int_stree_delete_subtree(tree, child); child = next; } if (int_stree_has_intleaves(tree, node)) { intleaf = (STREE_INTLEAF) node->children; while (intleaf != NULL) { intnext = intleaf->next; int_stree_free_intleaf(tree, intleaf); intleaf = intnext; } } int_stree_free_node(tree, node); } tree->idents_dirty = 1; }
/* * int_stree_edge_merge * * When a node has no "leaves" and only one child, this function will * remove that node and merge the edges from parent to node and node * to child into a single edge from parent to child. * * Parameters: tree - A suffix tree * node - The tree node to be removed * * Return: nothing. */ void int_stree_edge_merge(SUFFIX_TREE tree, STREE_NODE node) { int len; STREE_NODE parent, child; STREE_LEAF leaf; if (node == stree_get_root(tree) || int_stree_isaleaf(tree, node) || int_stree_has_intleaves(tree, node)) return; parent = stree_get_parent(tree, node); child = stree_get_children(tree, node); if (stree_get_next(tree, child) != NULL) return; len = stree_get_edgelen(tree, node); if (int_stree_isaleaf(tree, child)) { leaf = (STREE_LEAF) child; leaf->pos -= len; leaf->ch = stree_get_mapch(tree, node); } else { child->edgestr -= len; child->edgelen += len; } int_stree_reconnect(tree, parent, node, child); tree->num_nodes--; tree->idents_dirty = 1; int_stree_free_node(tree, node); }
/* * int_stree_reconnect * * Replaces one node with another in the suffix tree, reconnecting * the link from the parent to the new node. * * Parameters: tree - A suffix tree * parent - The parent of the node being replaced * oldchild - The child being replaced * newchild - The new child * * Returns: nothing */ int int_stree_reconnect(SUFFIX_TREE tree, STREE_NODE parent, STREE_NODE oldchild, STREE_NODE newchild) { STREE_NODE node, back; node = stree_get_children(tree, parent); back = NULL; while (node != NULL && node != oldchild) { back = node; node = stree_get_next(tree, node); } if (node == NULL) return 0; newchild->next = oldchild->next; newchild->nextisparent = oldchild->nextisparent; if (back == NULL) { if (int_stree_has_intleaves(tree, parent)) ((STREE_INTLEAF) (parent->children))->nextchild = newchild; else parent->children = newchild; } else back->next = newchild; tree->idents_dirty = 1; return 1; }
/* * compute_A * * Computes the A values for the LCA preprocessing. * The A values are, for each node, the heights of the least significant * bits of the ancestors of the node (where the bits of each A value are * set to 1 for each such height of an ancestor). * * Parameters: lca - an LCA_STRUCT structure * tree - a suffix tree * node - a suffix tree node * Amask - the bits set by the ancestors of node */ static void compute_A(LCA_STRUCT *lca, SUFFIX_TREE tree, STREE_NODE node, unsigned int Amask) { // Shift idents so that they go from 1..num_nodes. unsigned int id = (unsigned int)stree_get_ident(tree, node) + 1; Amask |= 1 << h(lca->I[id]); lca->A[id] = Amask; for (STREE_NODE child = stree_get_children(tree, node); child; child = stree_get_next(tree, child)) { compute_A(lca, tree, child, Amask); IF_STATS(lca->num_prep++); } }
/* * stree_get_num_children * * Return the number of children of a node. * * Parameters: tree - a suffix tree * node - a tree node * * Returns: the number of children. */ int stree_get_num_children(SUFFIX_TREE tree, STREE_NODE node) { int count; STREE_NODE child; count = 0; child = stree_get_children(tree, node); while (child != NULL) { count++; child = stree_get_next(tree, child); } return count; }
void stree_traverse_subtree(SUFFIX_TREE tree, STREE_NODE root, int (*preorder_fn)(), int (*postorder_fn)()) { STREE_NODE node, next; /* * Use a non-recursive traversal */ node = root; while (1) { /* * Begin processing a node. If it has any children, then move down * and process the children. */ if (preorder_fn != NULL) (*preorder_fn)(tree, node); next = stree_get_children(tree, node); if (next != NULL) { node = next; continue; } /* * We've finished processing the children (if any). Finish the * processing of the node, then either move to the next child * below the parent of node (accessed by the next field, instead * of moving up the tree to the parent and then down), or move up * to the parent if there is no next. * * If we've finished processing the root of the subtree, then return. */ while (1) { if (postorder_fn != NULL) (*postorder_fn)(tree, node); if (node == root) return; if ((next = stree_get_next(tree, node)) != NULL) break; node = stree_get_parent(tree, node); } node = next; } }
/* * int_stree_connect * * Connect a node as the child of another node. * * Parameters: tree - A suffix tree * node - The node to get the new child. * child - The child being added. * * Returns: The parent after the child has been connected (if the * parent was originally a leaf, this may mean replacing * the leaf with a node). */ STREE_NODE int_stree_connect(SUFFIX_TREE tree, STREE_NODE parent, STREE_NODE newchild) { char ch; STREE_NODE node, back; if (int_stree_isaleaf(tree, parent) && (parent = int_stree_convert_leafnode(tree, parent)) == NULL) return NULL; ch = stree_get_mapch(tree, newchild); node = stree_get_children(tree, parent); back = NULL; while (node != NULL && stree_get_mapch(tree, node) < ch) { back = node; node = stree_get_next(tree, node); } if (node != NULL) { if (stree_get_mapch(tree, node) == ch) return NULL; newchild->next = node; newchild->nextisparent = 0; } else { newchild->next = parent; newchild->nextisparent = 1; } if (back == NULL) { if (int_stree_has_intleaves(tree, parent)) ((STREE_INTLEAF) (parent->children))->nextchild = newchild; else parent->children = newchild; } else { back->next = newchild; back->nextisparent = 0; } tree->idents_dirty = 1; return parent; }
/* * stree_find_child * * Find the child of a node whose edge label begins with the character given * as a parameter. * * Parameters: tree - a suffix tree * node - a tree node * ch - a character * * Returns: a tree node or NULL. */ STREE_NODE stree_find_child(SUFFIX_TREE tree, STREE_NODE node, char ch) { char mapch; STREE_NODE child; if (ch < 0 || ch >= tree->alpha_size) return NULL; mapch = stree_mapch(tree, ch); child = stree_get_children(tree, node); while (child != NULL && stree_get_mapch(tree, child) < mapch) child = stree_get_next(tree, child); if (child != NULL && mapch == stree_get_mapch(tree, child)) return child; else return NULL; }
/* * compute_I_and_L * * Compute the I values and L values for the LCA preprocessing. * The I values are, for each node, the identifier with the largest least * significant 1 bit in the subtree rooted at the node. * The L values are,for each node corresponding to an I value, the node * at the head of each "run" in the tree. * * Parameters: lca - an LCA_STRUCT structure * tree - a suffix tree * node - a suffix tree node * * Returns: the identifier with the largest least significant 1 bit in * the subtree rooted at node. */ static int compute_I_and_L(LCA_STRUCT *lca, SUFFIX_TREE tree, STREE_NODE node) { // Shift idents so that they go from 1..num_nodes. unsigned int id = (unsigned int)stree_get_ident(tree, node) + 1; // Find the node with the maximum I value in the subtree. unsigned int Imax = id; for (STREE_NODE child = stree_get_children(tree, node); child; child = stree_get_next(tree, child)) { unsigned int Ival = compute_I_and_L(lca, tree, child); if (h(Ival) > h(Imax)) Imax = Ival; IF_STATS(lca->num_prep++); } lca->I[id] = Imax; lca->L[Imax] = node; // will be overwritten by the highest node in run return Imax; }
/* * int_stree_set_idents * * Uses the non-recursive traversal to set the identifiers for the current * nodes of the suffix tree. The nodes are numbered in a depth-first * manner, beginning from the root and taking the nodes in the order they * appear in the children lists. * * Parameters: tree - A suffix tree * * Return: nothing. */ void int_stree_set_idents(SUFFIX_TREE tree) { int id; STREE_NODE node, next; if (!tree->idents_dirty) return; tree->idents_dirty = 0; /* * Use a non-recursive traversal. See stree_traverse_subtree for * details. */ id = 0; node = stree_get_root(tree); while (1) { node->id = id++; next = stree_get_children(tree, node); if (next != NULL) { node = next; continue; } while (1) { if (node == stree_get_root(tree)) return; if ((next = stree_get_next(tree, node)) != NULL) break; node = stree_get_parent(tree, node); } node = next; } }