/** * Walk down the tree from the given node following the given path * @param st the suffixtree in question * @param v the node to start from its children * @param p the path to walk down and then free * @return a position corresponding to end */ static pos *walk_down( suffixtree *st, node *v, path *p ) { pos *q=NULL; int start = path_start( p ); int len = path_len( p ); v = node_find_child( v, st->str, st->str[start] ); while ( len > 0 ) { if ( len <= node_len(v) ) { q = pos_create(); q->loc = node_start(v)+len-1; q->v = v; break; } else { start += node_len(v); len -= node_len(v); v = node_find_child( v, st->str, st->str[start] ); } } path_dispose( p ); return q; }
/** * Find a child of an internal node starting with a character * @param v the internal node * @param c the char to look for * @return the child node */ static node *find_child( node *v, char c ) { v = node_children(v); while ( v != NULL && str[node_start(v)] != c ) v = node_next(v); return v; }
void run_chordtest(int argc, char **argv) { node *n = node_start(LOG_ERROR,0); if (NULL == n) exit(1); if (0 == argc) { /* Standalone mode; run the test locally */ endpointid managerid; managerid.ip = n->listenip; managerid.port = n->listenport; managerid.localid = MANAGER_ID; start_manager(n); testchord(n,&managerid,1); } else { /* Client mode; run the chord test on a set of remote nodes specified in the file */ endpointid *managerids; int nmanagers; if (0 != read_managers(n,argv[0],&managerids,&nmanagers)) exit(1); testchord(n,managerids,nmanagers); free(managerids); } node_run(n); }
/** * Find a location of the suffix in the tree. * @param st the suffixtree in question * @param j the extension number counting from 0 * @param i the current phase - 1 * @param log the log to record errors in * @return the position (combined node and edge-offset) */ static pos *find_beta( suffixtree *st, int j, int i, plugin_log *log ) { pos *p; if ( st->old_j > 0 && st->old_j == j ) { p = pos_create(); p->loc = st->old_beta.loc; p->v = st->old_beta.v; } else if ( j>i ) // empty string { p = pos_create(); p->loc = 0; p->v = st->root; } else if ( j==0 ) // entire string { p = pos_create(); p->loc = i; p->v = st->f; } else // walk across tree { node *v = st->last.v; int len = st->last.loc-node_start(st->last.v)+1; path *q = path_create( node_start(v), len, log ); v = node_parent( v ); while ( v != st->root && node_link(v)==NULL ) { path_prepend( q, node_len(v) ); v = node_parent( v ); } if ( v != st->root ) { v = node_link( v ); p = walk_down( st, v, q ); } else { path_dispose( q ); p = walk_down( st, st->root, path_create(j,i-j+1,log) ); } } st->last = *p; return p; }
/** * Find a location of the suffix in the tree. * @param j the extension number counting from 0 * @param i the current phase - 1 * @return the position (combined node and edge-offset) */ static pos *find_beta( int j, int i ) { pos *p; if ( old_j > 0 && old_j == j ) { p = pos_create(); p->loc = old_beta.loc; p->v = old_beta.v; } else if ( j>i ) // empty string { p = pos_create(); p->loc = 0; p->v = root; } else if ( j==0 ) // entire string { p = pos_create(); p->loc = i; p->v = f; } else // walk across tree { node *v = last.v; int len = last.loc-node_start(last.v)+1; path *q = path_create( node_start(v), len ); v = node_parent( v ); while ( v != root && node_link(v)==NULL ) { path_prepend( q, node_len(v) ); v = node_parent( v ); } if ( v != root ) { v = node_link( v ); p = walk_down( v, q ); } else { path_dispose( q ); p = walk_down( root, path_create(j,i-j+1) ); } } last = *p; return p; }
/** * Complete a single match between the pairs list and the suffixtree * @param m the match all ready to go * @param text the text of the new version * @param v the new version id * @param log the log to save errors in * @return 1 if the match was at least 1 char long else 0 */ int match_single( match *m, UChar *text, int v, plugin_log *log, int popped ) { UChar c; // go to the deepest match if not the first one (as usual) while ( m->next != NULL ) m = m->next; pos *loc = &m->loc; // preserve popped location in suffix tree if ( !popped ) { loc->v = suffixtree_root( m->st ); loc->loc = node_start(loc->v)-1; m->maximal = 0; } do { UChar *data = pair_data(card_pair(m->end.current)); c = data[m->end.pos]; if ( suffixtree_advance_pos(m->st,loc,c) ) { if ( m->bs == NULL ) m->bs = bitset_clone( pair_versions(card_pair(m->end.current)) ); if ( !m->maximal && node_is_leaf(loc->v) ) { m->text_off = m->st_off + node_start(loc->v)-m->len; if ( !is_maximal(m,text) ) break; else m->maximal = 1; } // we are already matched, so increase length m->len++; if ( !match_advance(m,loc,v,log) ) break; } else break; } while ( 1 ); return m->maximal; }
/** * Set the length of each leaf to e recursively * @param v the node in question */ static void set_e( node *v ) { if ( node_is_leaf(v) ) { node_set_len( v, e-node_start(v)+1 ); } node *u = node_children( v ); while ( u != NULL ) { set_e( u ); u = node_next( u ); } }
/** * Record the position where the latest suffix was inserted * @param st the suffixtree in question * @param p the position of j..i-1. * @param i the desired index of the extra char */ static void update_old_beta( suffixtree *st, pos *p, int i ) { if ( node_end(p->v,st->e) > p->loc ) { st->old_beta.v = p->v; st->old_beta.loc = p->loc+1; } else { node *u = node_find_child( p->v, st->str, st->str[i] ); st->old_beta.v = u; st->old_beta.loc = node_start( u ); } }
/** * Record the position where the latest suffix was inserted * @param p the position of j..i-1. * @param i the desired index of the extra char */ static void update_old_beta( pos *p, int i ) { if ( node_end(p->v,e) > p->loc ) { old_beta.v = p->v; old_beta.loc = p->loc+1; } else { node *u = find_child( p->v, str[i] ); old_beta.v = u; old_beta.loc = node_start( u ); } }
/** * Set the length of each leaf to e recursively * @param v the node in question * @param log the log to record errors in */ static void set_e( suffixtree *st, node *v, plugin_log *log ) { if ( node_is_leaf(v) ) { node_set_len( v, st->e-node_start(v)+1 ); } node_iterator *iter = node_children( v, log ); if ( iter != NULL ) { while ( node_iterator_has_next(iter) ) { node *u = node_iterator_next( iter ); set_e( st, u, log ); } node_iterator_dispose( iter ); } }
long calc_similars(const char* line) { root = build_tree( line ); int total = 0, multiple = 0, pplus = 0; if ( root != NULL ) { node *u = node_children(root); const char *p = line; node *next_u = NULL; while(*p) { next_u = NULL; while(u != NULL) { int nstart = node_start(u); if (line[nstart] == *p) { int end = node_end(u,e); pplus = end - nstart + (line[end] == 0 ? 0 : 1); next_u = node_children(u); } else if (node_is_leaf(u)){ multiple++; } else { multiple += node_num_children(node_children(u)); } u = node_next(u); } total += (p - line) * multiple; p += pplus; u = next_u; multiple = 0; } total += (p - line); node_dispose( root ); } return total; }
/** * Advance a search by one character. * @param st the suffixtree to search * @param p the position in the tree of the last match, update if c found * @param c the character to find next * @return 1 if the next char was found else 0 */ int suffixtree_advance_pos( suffixtree *st, pos *p, UChar c ) { int res = 1; if ( node_end(p->v,st->e) > p->loc ) { if ( st->str[p->loc+1] == c ) p->loc++; else res = 0; } else { node *n = node_find_child(p->v,st->str,c); if ( n != NULL ) { p->loc = node_start(n); p->v = n; } else res = 0; } return res; }