/** * Build a tree using a given string * @param txt the text to build it from * @return the finished tree's root */ node *build_tree( char *txt ) { // init globals e = 0; root=NULL; f=NULL; current=NULL; memset( &last, 0, sizeof(pos) ); memset( &old_beta, 0, sizeof(pos) ); old_j = 0; str = txt; slen = strlen(txt); // actually build the tree root = node_create( 0, 0 ); if ( root != NULL ) { f = node_create_leaf( 0 ); if ( f != NULL ) { int i; node_add_child( root, f ); for ( i=1; i<=slen; i++ ) phase(i); set_e( root ); } } return root; }
/** * Add an initially single-char leaf to the tree * @param parent the node to hang it off * @param i start-index in str of the leaf */ int node_add_leaf( node *parent, int i ) { int res = 0; node *leaf = node_create_leaf( i ); if ( leaf != NULL ) { node_add_child( parent, leaf ); res = 1; } return res; }
/** * Extend the implicit suffix tree by adding one suffix of the current prefix * @param st the current suffixtree * @param j the offset into str of the suffix's start * @param i the offset into str at the end of the current prefix * @param log the log to record errors in * @return 1 if the phase continues else 0 */ static int extension( suffixtree *st, int j, int i, plugin_log *log ) { int res = 1; pos *p = find_beta( st, j, i-1, log ); // rule 1 (once a leaf always a leaf) if ( node_is_leaf(p->v) && pos_at_edge_end(st,p) ) res = 1; // rule 2 else if ( !continues(st,p,st->str[i]) ) { //printf("applying rule 2 at j=%d for phase %d\n",j,i); node *leaf = node_create_leaf( i, log ); if ( p->v==st->root || pos_at_edge_end(st,p) ) { node_add_child( p->v, leaf, st->str, log ); update_current_link( st, p->v ); } else { node *u = node_split( p->v, p->loc, st->str, log ); update_current_link( st, u ); if ( i-j==1 ) { node_set_link( u, st->root ); #ifdef DEBUG verify_link( current ); #endif } else st->current = u; node_add_child( u, leaf, st->str, log ); } update_old_beta( st, p, i ); } // rule 3 else { //printf("applying rule 3 at j=%d for phase %d\n",j,i); update_current_link( st, p->v ); update_old_beta( st, p, i ); res = 0; } free( p ); return res; }
/** * Extend the implicit suffix tree by adding one suffix of the current prefix * @param j the offset into str of the suffix's start * @param i the offset into str at the end of the current prefix * @return 1 if the phase continues else 0 */ static int extension( int j, int i ) { int res = 1; pos *p = find_beta( j, i-1 ); // rule 1 (once a leaf always a leaf) if ( node_is_leaf(p->v) && pos_at_edge_end(p) ) res = 1; // rule 2 else if ( !continues(p,str[i]) ) { //printf("applying rule 2 at j=%d for phase %d\n",j,i); node *leaf = node_create_leaf( i ); if ( p->v==root || pos_at_edge_end(p) ) { node_add_child( p->v, leaf ); update_current_link( p->v ); } else { node *u = node_split( p->v, p->loc ); update_current_link( u ); if ( i-j==1 ) { node_set_link( u, root ); #ifdef DEBUG verify_link( current ); #endif } else current = u; node_add_child( u, leaf ); } update_old_beta( p, i ); } // rule 3 else { //printf("applying rule 3 at j=%d for phase %d\n",j,i); update_current_link( p->v ); update_old_beta( p, i ); res = 0; } free( p ); return res; }
/** * Build a tree using a given string * @param txt the text to build it from * @param tlen its length * @param log the log to record errors in * @return the finished tree */ suffixtree *suffixtree_create( UChar *txt, size_t tlen, plugin_log *log ) { if ( txt[tlen] != 0 ) { plugin_log_add(log,"suffixtree: text not null-terminated!"); return NULL; } else { suffixtree *st = calloc( 1, sizeof(suffixtree) ); if ( st != NULL ) { st->e = 0; memset( &st->last, 0, sizeof(pos) ); memset( &st->old_beta, 0, sizeof(pos) ); st->str = txt; st->slen = tlen; // actually build the tree st->root = node_create( 0, 0, log ); if ( st->root != NULL ) { st->f = node_create_leaf( 0, log ); if ( st->f != NULL ) { int i; node_add_child( st->root, st->f, st->str, log ); for ( i=1; i<=tlen; i++ ) phase(st,i,log); set_e( st, st->root, log ); } } } else fprintf(stderr,"suffixtree: failed to allocate tree\n"); return st; } }