/** * Split a range into 3 parts to accommodate a tree-node * @param d the dom in question * @param n the node in the tree * @param r the out of tree range r to split up */ static void dom_breakup_range( dom *d, node *n, node *r ) { node *r2; if ( node_offset(n) > node_offset(r) ) { node_split( r, node_offset(n) ); r2 = node_next_sibling( r ); node_detach_sibling( r, NULL ); dom_store_range( d, node_to_range(r) ); //queue_push( d->q, node_to_range(r) ); node_dispose( r ); } else r2 = r; if ( node_end(r2)>node_end(n) ) { node *r3; node_split( r2, node_end(n) ); r3 = node_next_sibling(r2); node_detach_sibling(r3,r2); dom_store_range( d, node_to_range(r3) ); //queue_push( d->q, node_to_range(r3) ); node_dispose(r3); } dom_node_equals( d, n, r2 ); }
/** * Try to make the new node into a parent of the tree-node n. The problem * here is that we must include any siblings of n in r if they fit. * @param n the node above which to add the parent * @param r the new unattached node */ static void dom_make_parent( dom *d, node *n, node *r ) { node *parent = node_parent(n); node *prev = node_prec_sibling( n ); if ( parent==NULL ) printf("parent is NULL\n"); //fprintf( stderr,"n: %s %d:%d; r %s %d:%d\n",node_name(n),node_offset(n), // node_end(n),node_name(r),node_offset(r),node_end(r)); //node_debug_check_siblings( node_first_child(parent) ); while ( n != NULL && !node_follows(r,n) ) { node *next = node_next_sibling(n); if ( dom_nests(d,node_name(n),node_name(r)) ) { if ( range_encloses_node(n,r) || range_equals_node(n,r) ) { node_detach_sibling( n, prev ); node_add_child( r, n ); if ( node_overlaps_on_right(parent,r) ) { node_split( r, node_end(parent) ); node *r2 = node_next_sibling( r ); node_detach_sibling( r, NULL ); dom_store_range( d, node_to_range(r2) ); node_dispose( r2 ); } } else if ( node_overlaps_on_left(n,r) ) { node_split( n, node_end(r) ); node_detach_sibling( n, prev ); node_add_child( r, n ); break; } else break; } else { // split off the rest of r and and push it back // Q: what happens to r?? node *r2; node_split( r, node_offset(n) ); r2 = node_next_sibling( r ); node_detach_sibling( r, NULL ); dom_store_range( d, node_to_range(r2) ); //queue_push( d->q, node_to_range(r2) ); node_dispose( r2 ); break; } n = next; if ( n != NULL ) prev = node_prec_sibling( n ); } // make n's original parent the parent of r node_add_child( parent, r ); // node_debug_check_siblings( node_first_child(parent) ); }
/** * Does the node properly contain the range (now a node)? * @param n the node already in the tree * @param r a loose node looking for a home * @return 1 if n encloses r by a greater range */ static int node_encloses_range( node *n, node *r ) { int r_end = node_end(r); int n_end = node_end(n); if ( node_offset(n) == node_offset(r) && r_end==n_end ) return 0; else return node_offset(n)<=node_offset(r) && n_end>=r_end; }
/** * Split a node into 3 parts to accommodate a range * @param d the dom in question * @param n the node in the tree to split up * @param r the out of tree node r that overlaps with n */ static void dom_breakup_node( dom *d, node *n, node *r ) { node *n2; if ( node_offset(r) > node_offset(n) ) { node_split( n, node_offset(r) ); n2 = node_next_sibling( n ); } else n2 = n; if ( node_end(r) < node_end(n2) ) node_split( n2, node_end(r) ); dom_node_equals( d, n2, r ); }
PB_DS_CLASS_T_DEC inline typename PB_DS_CLASS_C_DEC::size_type PB_DS_CLASS_C_DEC:: order_of_key(key_const_reference r_key) const { node_const_iterator it = node_begin(); node_const_iterator end_it = node_end(); const cmp_fn& r_cmp_fn = const_cast<PB_DS_CLASS_C_DEC*>(this)->get_cmp_fn(); size_type ord = 0; while (it != end_it) { node_const_iterator l_it = it.get_l_child(); if (r_cmp_fn(r_key, this->extract_key(*(*it)))) it = l_it; else if (r_cmp_fn(this->extract_key(*(*it)), r_key)) { ord += (l_it == end_it)? 1 : 1 + l_it.get_metadata(); it = it.get_r_child(); } else { ord += (l_it == end_it)? 0 : l_it.get_metadata(); it = end_it; } } return ord; }
PB_DS_CLASS_T_DEC std::pair< typename PB_DS_CLASS_C_DEC::iterator, typename PB_DS_CLASS_C_DEC::iterator> PB_DS_CLASS_C_DEC:: prefix_range(typename access_traits::const_iterator b, typename access_traits::const_iterator e) { Node_Itr nd_it = node_begin(); Node_Itr end_nd_it = node_end(); const access_traits& r_traits = get_access_traits(); const size_type given_range_length = std::distance(b, e); while (true) { if (nd_it == end_nd_it) return (std::make_pair(end(), end())); const size_type common_range_length = base_type::common_prefix_len(nd_it, b, e, r_traits); if (common_range_length >= given_range_length) { iterator ret_b = this->leftmost_it(nd_it); iterator ret_e = this->rightmost_it(nd_it); return (std::make_pair(ret_b, ++ret_e)); } nd_it = next_child(nd_it, b, e, end_nd_it, r_traits); } }
/** * Does the position continue with the given character? * @param p a position in the tree. * @param c the character to test for in the next position * @return 1 if it does else 0 */ static int continues( pos *p, char c ) { if ( node_end(p->v,e) > p->loc ) return str[p->loc+1] == c; else return find_child(p->v,c) != NULL; }
inline typename skip_list<T>::type_node* skip_list<T>::erase_internal ( typename skip_list<T>::type_node** update ) { // ASSERT: update[0]->next[0] != node_end() assert(update[0]->next[0] != node_end()); type_node* curr = update[0]->next[0]; type_node* next = curr->next[0]; for (u8 lvl = 0; lvl < level_max_current_m; ++lvl) { if (update[lvl]->next[lvl] == curr) { update[lvl]->next[lvl] = curr->next[lvl]; } } next->prev = curr->prev; // == update[0]->prev delete_node(curr); while (level_max_current_m > 1 && header_node_m->next[level_max_current_m - 1] == node_end()) { --level_max_current_m; } --size_m; return next; }
PB_DS_CLASS_T_DEC inline typename PB_DS_CLASS_C_DEC::iterator PB_DS_CLASS_C_DEC:: find_by_order(size_type order) { node_iterator it = node_begin(); node_iterator end_it = node_end(); while (it != end_it) { node_iterator l_it = it.get_l_child(); const size_type o = (l_it == end_it)? 0 : l_it.get_metadata(); if (order == o) return *it; else if (order < o) it = l_it; else { order -= o + 1; it = it.get_r_child(); } } return base_type::end_iterator(); }
/** * Does the position continue with the given character? * @param st the suffixtree object * @param p a position in the tree. * @param c the character to test for in the next position * @return 1 if it does else 0 */ static int continues( suffixtree *st, pos *p, UChar c ) { if ( node_end(p->v,st->e) > p->loc ) return st->str[p->loc+1] == c; else return node_find_child(p->v,st->str,c) != NULL; }
void print_levels() { for (type_node* it = node_begin(); it != node_end(); it = it->next[0]) { printf("%d, ", it->next_size); } printf("\n"); }
PB_DS_CLASS_T_DEC inline typename PB_DS_CLASS_C_DEC::size_type PB_DS_CLASS_C_DEC:: order_of_prefix(typename access_traits::const_iterator b, typename access_traits::const_iterator e) const { if (empty()) return 0; const _ATraits& r_traits = const_cast<PB_DS_CLASS_C_DEC*>(this)->get_access_traits(); node_const_iterator nd_it = node_begin(); node_const_iterator end_nd_it = node_end(); size_type ord = 0; while (true) { const size_type num_children = nd_it.num_children(); if (num_children == 0) { key_const_reference r_key = base_type::extract_key(*(*nd_it)); typename access_traits::const_iterator key_b = r_traits.begin(r_key); typename access_traits::const_iterator key_e = r_traits.end(r_key); return (base_type::less(key_b, key_e, b, e, r_traits)) ? ord + 1 : ord; } node_const_iterator next_nd_it = end_nd_it; size_type i = num_children - 1; do { node_const_iterator child_nd_it = nd_it.get_child(i); if (next_nd_it != end_nd_it) ord += child_nd_it.get_metadata(); else if (!base_type::less(b, e, child_nd_it.valid_prefix().first, child_nd_it.valid_prefix().second, r_traits)) next_nd_it = child_nd_it; } while (i-- > 0); if (next_nd_it == end_nd_it) return ord; nd_it = next_nd_it; } }
/** * Record the position where the latest suffix was inserted * @param st the suffixtree in question * @param p the position of j..i-1. * @param i the desired index of the extra char */ static void update_old_beta( suffixtree *st, pos *p, int i ) { if ( node_end(p->v,st->e) > p->loc ) { st->old_beta.v = p->v; st->old_beta.loc = p->loc+1; } else { node *u = node_find_child( p->v, st->str, st->str[i] ); st->old_beta.v = u; st->old_beta.loc = node_start( u ); } }
/** * Write to the console details of the dropped node * @param d the dom in question * @param r the node we are dropping * @param n the parent node */ static void dom_drop_notify( dom *d, node *r, node *n ) { warning("dom: dropping %s at %d:%d - %s and %s incompatible\n", node_name(r),node_offset(r), node_end(r),node_html_name(r),node_html_name(n)); attribute *id = node_get_attribute( r, "id" ); if ( id != NULL ) { char *value = attribute_get_value( id ); if ( value[strlen(value)-1]=='b' ) printf( "aha! dropping id %s\n",value ); } node_dispose( r ); }
/** * Record the position where the latest suffix was inserted * @param p the position of j..i-1. * @param i the desired index of the extra char */ static void update_old_beta( pos *p, int i ) { if ( node_end(p->v,e) > p->loc ) { old_beta.v = p->v; old_beta.loc = p->loc+1; } else { node *u = find_child( p->v, str[i] ); old_beta.v = u; old_beta.loc = node_start( u ); } }
/** * Print a single node and its children, siblings * @param d the dom in question * @param n the node to print */ static void dom_print_node( dom *d, node *n ) { node *c; int start,end; char *html_name = node_html_name(n); char *class_name = node_name(n); char attrs[128]; node_get_attributes( n, attrs, 128 ); if ( !node_empty(n) ) { if ( !node_is_root(n) ) dom_concat( d, "<%s%s class=\"%s\">", strlen(html_name) +strlen(class_name)+strlen(attrs)+11, html_name, attrs, class_name ); } c = node_first_child(n); start = node_offset(n); end = node_end(n); while ( c != NULL ) { int pos = node_offset( c ); if ( pos > start ) dom_print_text( d, start, pos-start ); dom_print_node( d, c ); start = node_end( c ); c = node_next_sibling( c ); } if ( end > start ) dom_print_text( d, start, end-start ); if ( !node_is_root(n) ) { if ( !node_empty(n) ) dom_concat(d, "</%s>",strlen(html_name)+3, html_name); else if ( node_rightmost(n) ) dom_concat(d,"<%s>",strlen(html_name)+2,html_name); } }
/** * Build the dom * @param d the dom object to build */ int dom_build( dom *d ) { int res = 1; while ( !queue_empty(d->q) ) { range *rx = queue_pop( d->q ); node *r = dom_range_to_node( d, rx ); if ( r != NULL ) { if ( node_end(r) <= d->text_len ) dom_add_node( d, d->root, r ); else { fprintf(stderr,"node range %d:%d > text length (%d)\n", node_offset(r),node_end(r), d->text_len ); node_dispose( r ); res = 0; break; } } } //matrix_dump( d->pm ); return res; }
/** * Handle overlap on the right of a tree-node * @param d the dom in question * @param n the node to test against * @param r the rogue who overlaps on the right */ static void dom_range_overlaps_right( dom *d, node *n, node *r ) { if ( dom_mostly_nests(d,node_name(n),node_name(r)) ) { node_split( n, node_offset(r) ); dom_add_node( d, node_next_sibling(n), r ); } else if ( dom_mostly_nests(d,node_name(r),node_name(n)) ) { node *r2; node_split( r, node_end(n) ); r2 = node_next_sibling(r); node_detach_sibling( r, NULL ); dom_store_range( d, node_to_range(r2) ); //queue_push( d->q, node_to_range(r2) ); node_dispose( r2 ); dom_add_node( d, n, r ); } else dom_drop_notify( d, r, n ); }
PB_DS_CLASS_T_DEC inline typename PB_DS_CLASS_C_DEC::iterator PB_DS_CLASS_C_DEC:: find_by_order(size_type order) { if (empty()) return (end()); ++order; node_iterator nd_it = node_begin(); node_iterator end_nd_it = node_end(); while (true) { if (order > nd_it.get_metadata()) return (++base_type::rightmost_it(nd_it)); const size_type num_children = nd_it.num_children(); if (num_children == 0) return (*nd_it); for (size_type i = 0; i < num_children; ++i) { node_iterator child_nd_it = nd_it.get_child(i); if (order <= child_nd_it.get_metadata()) { i = num_children; nd_it = child_nd_it; } else order -= child_nd_it.get_metadata(); } } }
/** * Check a single tree-node, recursively */ static int dom_check_node( node *n ) { int res = 1; int start = node_offset(n); int end = node_end(n); node *c = node_first_child(n); node *prev = NULL; while ( c != NULL ) { node *next = node_next_sibling( c ); if ( node_offset(c)<start ) { warning("dom: invalid offset %d < parent start %d\n",node_offset(c), start); return 0; } else if ( node_end(c)>end ) { warning("dom: invalid end %d (%s) > parent end %d (%s)\n", node_end(c), node_name(c), end, node_name(n) ); return 0; } else if ( prev != NULL && node_end(prev)>node_offset(c) ) { warning("dom: prev node ending %d encroaches on child node at %d\n", node_end(prev), node_offset(c)); return 0; } else if ( next != NULL && node_end(c)>node_offset(next) ) { warning("dom: next node starting %d encroaches on child node ending at %d\n", node_offset(next), node_end(c)); return 0; } else res = dom_check_node( c ); prev = c; c = node_next_sibling( c ); } return res; }
long calc_similars(const char* line) { root = build_tree( line ); int total = 0, multiple = 0, pplus = 0; if ( root != NULL ) { node *u = node_children(root); const char *p = line; node *next_u = NULL; while(*p) { next_u = NULL; while(u != NULL) { int nstart = node_start(u); if (line[nstart] == *p) { int end = node_end(u,e); pplus = end - nstart + (line[end] == 0 ? 0 : 1); next_u = node_children(u); } else if (node_is_leaf(u)){ multiple++; } else { multiple += node_num_children(node_children(u)); } u = node_next(u); } total += (p - line) * multiple; p += pplus; u = next_u; multiple = 0; } total += (p - line); node_dispose( root ); } return total; }
/** * Advance a search by one character. * @param st the suffixtree to search * @param p the position in the tree of the last match, update if c found * @param c the character to find next * @return 1 if the next char was found else 0 */ int suffixtree_advance_pos( suffixtree *st, pos *p, UChar c ) { int res = 1; if ( node_end(p->v,st->e) > p->loc ) { if ( st->str[p->loc+1] == c ) p->loc++; else res = 0; } else { node *n = node_find_child(p->v,st->str,c); if ( n != NULL ) { p->loc = node_start(n); p->v = n; } else res = 0; } return res; }
/** * Are we at the end of this edge? * @param p the position to test * @return 1 if it is, else 0 */ static int pos_at_edge_end( pos *p ) { return p->loc==node_end(p->v,e); }
/** * Are we at the end of this edge? * @param p the position to test * @return 1 if it is, else 0 */ int pos_at_edge_end( suffixtree *st, pos *p ) { return p->loc==node_end(p->v,st->e); }