static void insertleaf(Suffixtree *stree) { Uint *ptr, newleaf; newleaf = MAKELEAF(stree->nextfreeleafnum); if(stree->headnodedepth == 0) { // head is the root if(stree->tailptr != stree->sentinel) { // no \$-edge initially stree->rootchildren[(Uint) *(stree->tailptr)] = newleaf; *(stree->nextfreeleafptr) = VALIDINIT; } } else { if (stree->insertprev == 0) { // newleaf = first child *(stree->nextfreeleafptr) = GETCHILD(stree->headnode); SETCHILD(stree->headnode,newleaf); } else { if(ISLEAF(stree->insertprev)) { // previous node is leaf ptr = stree->leaftab + GETLEAFINDEX(stree->insertprev); *(stree->nextfreeleafptr) = LEAFBROTHERVAL(*ptr); SETLEAFBROTHER(ptr,newleaf); } else { // previous node is branching node ptr = stree->branchtab + GETBRANCHINDEX(stree->insertprev); *(stree->nextfreeleafptr) = GETBROTHER(ptr); SETBROTHER(ptr,newleaf); } } } RECALLSUCC(newleaf); // recall node on s.cppessor path of \emph{headnode} stree->nextfreeleafnum++; stree->nextfreeleafptr++; }
void succlocationsstree(Suffixtree *stree,bool nosentinel,Simpleloc *loc, ArraySimpleloc *ll) { Uint succdepth, succ, leafindex, distance, depth, headpos, remain, *succptr, *largeptr, *nodeptr; Simpleloc *llptr; fprintf(stderr,"succlocationsstree\n"); ll->nextfreeSimpleloc = 0; CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1); if(loc->remain > 0) { if(nosentinel && loc->nextnode.toleaf && loc->remain <= UintConst(1)) { // at the end of leaf edge: only a\$ remains return; } llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++; llptr->textpos = loc->textpos + 1; llptr->remain = loc->remain - 1; llptr->nextnode.address = loc->nextnode.address; llptr->nextnode.toleaf = loc->nextnode.toleaf; CHECKADDR(stree,llptr->nextnode); return; } nodeptr = loc->nextnode.address; GETONLYDEPTH(depth,nodeptr); succ = GETCHILD(nodeptr); do // traverse the list of successors { if(ISLEAF(succ)) // successor is leaf { leafindex = GETLEAFINDEX(succ); remain = stree->textlen - (depth + leafindex); if(!nosentinel || remain >= UintConst(1)) { llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++; llptr->remain = remain; llptr->textpos = depth + leafindex; llptr->nextnode.address = stree->leaftab + leafindex; llptr->nextnode.toleaf = true; CHECKADDR(stree,llptr->nextnode); } succ = LEAFBROTHERVAL(stree->leaftab[leafindex]); } else // successor is branch node { succptr = stree->branchtab + GETBRANCHINDEX(succ); GETBOTH(succdepth,headpos,succptr); // get info for branch node llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++; llptr->textpos = depth + headpos; llptr->remain = succdepth - depth - 1; llptr->nextnode.toleaf = false; llptr->nextnode.address = succptr; CHECKADDR(stree,llptr->nextnode); succ = GETBROTHER(succptr); } } while(!NILPTR(succ)); }
void getbranchinfostree(Suffixtree *stree,Uint whichinfo, Branchinfo *branchinfo,Bref btptr) { Uint which = whichinfo, node, distance, *largeptr; if(which & ACCESSSUFFIXLINK) { which |= ACCESSDEPTH; } if(which & (ACCESSDEPTH | ACCESSHEADPOS)) { if(stree->chainstart != NULL && btptr >= stree->chainstart) { distance = DIVBYSMALLINTS((Uint) (stree->nextfreebranch - btptr)); branchinfo->depth = stree->currentdepth + distance; branchinfo->headposition = stree->nextfreeleafnum - distance; } else { if(ISLARGE(*btptr)) { if(which & ACCESSDEPTH) { branchinfo->depth = GETDEPTH(btptr); } if(which & ACCESSHEADPOS) { branchinfo->headposition = GETHEADPOS(btptr); } } else { distance = GETDISTANCE(btptr); GETCHAINEND(largeptr,btptr,distance); if(which & ACCESSDEPTH) { branchinfo->depth = GETDEPTH(largeptr) + distance; } if(which & ACCESSHEADPOS) { branchinfo->headposition = GETHEADPOS(largeptr) - distance; } } } } if(which & ACCESSSUFFIXLINK) { if((stree->chainstart != NULL && btptr >= stree->chainstart) || !ISLARGE(*btptr)) { branchinfo->suffixlink = btptr + SMALLINTS; } else { branchinfo->suffixlink = stree->branchtab + getlargelinkstree(stree,btptr,branchinfo->depth); } /*SHOWINDEX((Uint) BRADDR2NUM(stree,btptr)); fprintf(stdout,"(%u) --> ", GETDEPTH(btptr)); SHOWINDEX((Uint) BRADDR2NUM(stree,branchinfo->suffixlink)); fprintf(stdout," (%u)\n", GETDEPTH(branchinfo->suffixlink));*/ } if(which & ACCESSFIRSTCHILD) { int2ref(stree,&(branchinfo->firstchild),GETCHILD(btptr)); } if(which & ACCESSBRANCHBROTHER) { node = GETBROTHER(btptr); if(NILPTR(node)) { branchinfo->branchbrother.address = NULL; } else { int2ref(stree,&(branchinfo->branchbrother),node); } } }
static void scanprefix(Suffixtree *stree) { Uint *nodeptr = NULL, *largeptr = NULL, leafindex, nodedepth, edgelen, node, distance = 0, prevnode, prefixlen, headposition; SYMBOL *leftborder = (SYMBOL *) NULL, tailchar, edgechar = 0; if(stree->headnodedepth == 0) { // headnode is root if(stree->tailptr == stree->sentinel) { // there is no \$-edge stree->headend = NULL; return; } tailchar = *(stree->tailptr); if((node = stree->rootchildren[(Uint) tailchar]) == 0) { stree->headend = NULL; return; } if(ISLEAF(node)) { // s.cppessor edge is leaf, compare tail and leaf edge label leftborder = stree->text + GETLEAFINDEX(node); prefixlen = 1 + taillcp(stree,leftborder+1,stree->sentinel-1); (stree->tailptr) += prefixlen; stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; return; } nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETBOTH(nodedepth,headposition,nodeptr); // get info for branch node leftborder = stree->text + headposition; prefixlen = 1 + taillcp(stree,leftborder+1,leftborder + nodedepth - 1); (stree->tailptr)+= prefixlen; if(nodedepth > prefixlen) { // cannot reach the s.cppessor, fall out of tree stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; return; } stree->headnode = nodeptr; stree->headnodedepth = nodedepth; } while(True) { // \emph{headnode} is not the root prevnode = 0; node = GETCHILD(stree->headnode); if(stree->tailptr == stree->sentinel) { // \$-edge do { // there is no \$-edge, so find last s.cppessor, of which it becomes right brother prevnode = node; if(ISLEAF(node)) { node = LEAFBROTHERVAL(stree->leaftab[GETLEAFINDEX(node)]); } else { node = GETBROTHER(stree->branchtab + GETBRANCHINDEX(node)); } } while(!NILPTR(node)); stree->insertnode = NILBIT; stree->insertprev = prevnode; stree->headend = NULL; return; } tailchar = *(stree->tailptr); do { // find s.cppessor edge with firstchar = tailchar if(ISLEAF(node)) { // s.cppessor is leaf leafindex = GETLEAFINDEX(node); leftborder = stree->text + (stree->headnodedepth + leafindex); if((edgechar = *leftborder) >= tailchar) { // edge will not come later break; } prevnode = node; node = LEAFBROTHERVAL(stree->leaftab[leafindex]); } else { // s.cppessor is branch node nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETONLYHEADPOS(headposition,nodeptr); leftborder = stree->text + (stree->headnodedepth + headposition); if((edgechar = *leftborder) >= tailchar) { // edge will not come later break; } prevnode = node; node = GETBROTHER(nodeptr); } } while(!NILPTR(node)); if(NILPTR(node) || edgechar > tailchar) { // edge not found stree->insertprev = prevnode; // new edge will become brother of this stree->headend = NULL; return; } if(ISLEAF(node)) { // correct edge is leaf edge, compare its label with tail prefixlen = 1 + taillcp(stree,leftborder+1,stree->sentinel-1); (stree->tailptr) += prefixlen; stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; stree->insertprev = prevnode; return; } GETDEPTHAFTERHEADPOS(nodedepth,nodeptr); // we already know headposition edgelen = nodedepth - stree->headnodedepth; prefixlen = 1 + taillcp(stree,leftborder+1,leftborder + edgelen - 1); (stree->tailptr) += prefixlen; if(edgelen > prefixlen) { // cannot reach next node stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; stree->insertprev = prevnode; return; } stree->headnode = nodeptr; stree->headnodedepth = nodedepth; } }
static void rescan (Suffixtree *stree) { Uint *nodeptr, *largeptr = NULL, distance = 0, node, prevnode, nodedepth, edgelen, wlen, leafindex, headposition; SYMBOL headchar, edgechar; if(stree->headnodedepth == 0) { // head is the root headchar = *(stree->headstart); // headstart is assumed to be not empty node = stree->rootchildren[(Uint) headchar]; if(ISLEAF(node)) { // stop if s.cppessor is leaf stree->insertnode = node; return; } nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETONLYDEPTH(nodedepth,nodeptr); wlen = (Uint) (stree->headend - stree->headstart + 1); if(nodedepth > wlen) { // cannot reach the s.cppessor node stree->insertnode = node; return; } stree->headnode = nodeptr; // go to s.cppessor node stree->headnodedepth = nodedepth; if(nodedepth == wlen) { // location has been scanned stree->headend = NULL; return; } (stree->headstart) += nodedepth; } while(True) { // \emph{headnode} is not the root headchar = *(stree->headstart); // \emph{headstart} is assumed to be nonempty prevnode = 0; node = GETCHILD(stree->headnode); while(True) { // traverse the list of s.cppessors if(ISLEAF(node)) { // s.cppessor is leaf leafindex = GETLEAFINDEX(node); edgechar = stree->text[stree->headnodedepth + leafindex]; if(edgechar == headchar) { // correct edge found stree->insertnode = node; stree->insertprev = prevnode; return; } prevnode = node; node = LEAFBROTHERVAL(stree->leaftab[leafindex]); } else { // s.cppessor is branch node nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETONLYHEADPOS(headposition,nodeptr); edgechar = stree->text[stree->headnodedepth + headposition]; if(edgechar == headchar) { // correct edge found break; } prevnode = node; node = GETBROTHER(nodeptr); } } GETDEPTHAFTERHEADPOS(nodedepth,nodeptr); // get info about s.cpp node edgelen = nodedepth - stree->headnodedepth; wlen = (Uint) (stree->headend - stree->headstart + 1); if(edgelen > wlen) { // cannot reach the s.cpp node stree->insertnode = node; stree->insertprev = prevnode; return; } stree->headnode = nodeptr; // go to the s.cppessor node stree->headnodedepth = nodedepth; if(edgelen == wlen) { // location is found stree->headend = NULL; return; } (stree->headstart) += edgelen; } }
Sint depthfirststree(Suffixtree *stree,Reference *startnode, Sint (*processleaf)(Uint,Bref,void *), bool (*processbranch1)(Bref,void *), Sint (*processbranch2)(Bref,void *), bool (*stoptraversal)(void *),void *stopinfo,void *info) { bool godown = true, readyforpop = false; Uint child, brotherval; Bref lcpnode = NULL; Reference currentnode; ArrayBref stack; if(startnode->toleaf) { if(processleaf((Uint) (startnode->address - stree->leaftab),NULL,info) != 0) { return -1; } return 0; } if(stoptraversal != NULL && stoptraversal(stopinfo)) { return 0; } currentnode.toleaf = false; currentnode.address = startnode->address; INITARRAY(&stack,Bref); STOREINARRAY(&stack,Bref,128,currentnode.address); SETCURRENT(GETCHILD(currentnode.address)); if(processbranch1 == NULL) { #define PROCESSBRANCH1(A,B) /* Nothing */ #define PROCESSBRANCH2(A,B) godown = true while(true) { if(stoptraversal != NULL && stoptraversal(stopinfo)) { return 0; } if(currentnode.toleaf) { /*fprintf(stderr,"visit leaf %lu ", (long unsigned int) LEAFADDR2NUM(stree,currentnode.address)); fprintf(stderr,"below %lu ",(long unsigned int) BRADDR2NUM(stree,startnode->address)); fprintf(stderr,"depth %lu\n",stree->currentdepth);*/ if(processleaf(LEAFADDR2NUM(stree,currentnode.address),lcpnode,info) != 0) { return -1; } brotherval = LEAFBROTHERVAL(*(currentnode.address)); if(NILPTR(brotherval)) { readyforpop = true; currentnode.toleaf = false; } else { SETCURRENT(brotherval); // current comes from brother lcpnode = stack.spaceBref[stack.nextfreeBref-1]; } } else { if(readyforpop) { if(stack.nextfreeBref == UintConst(1)) { break; } (stack.nextfreeBref)--; /*fprintf(stderr,"#pop[%lu]=",(long unsigned int) stack.nextfreeBref); fprintf(stderr,"%lu\n", (long unsigned int) BRADDR2NUM(stree,stack.spaceBref[stack.nextfreeBref]));*/ PROCESSBRANCH2(stack.spaceBref[stack.nextfreeBref],info); brotherval = GETBROTHER(stack.spaceBref[stack.nextfreeBref]); if(!NILPTR(brotherval)) { SETCURRENT(brotherval); // current comes from brother lcpnode = stack.spaceBref[stack.nextfreeBref-1]; readyforpop = false; } } else { /*fprintf(stderr,"#process1 %lu\n", (long unsigned int) BRADDR2NUM(stree,currentnode.address));*/ PROCESSBRANCH1(currentnode.address,info); if(godown) { STOREINARRAY(&stack,Bref,128,currentnode.address); /*fprintf(stderr,"#push[%lu]=",(long unsigned int) (stack.nextfreeBref-1)); fprintf(stderr,"%lu\n",(long unsigned int) BRADDR2NUM(stree,currentnode.address));*/ child = GETCHILD(currentnode.address); SETCURRENT(child); // current comes from child } else { brotherval = GETBROTHER(currentnode.address); if(NILPTR(brotherval)) { readyforpop = true; } else { SETCURRENT(brotherval); // current comes brother } } } } } } else { #undef PROCESSBRANCH1 #undef PROCESSBRANCH2 #define PROCESSBRANCH1(A,B) godown = processbranch1(A,B) #define PROCESSBRANCH2(A,B) if(processbranch2(A,B) != 0)\ {\ return -2;\ } while(true) { if(stoptraversal != NULL && stoptraversal(stopinfo)) { return 0; } if(currentnode.toleaf) { /*fprintf(stderr,"visit leaf %lu ", (long unsigned int) LEAFADDR2NUM(stree,currentnode.address)); fprintf(stderr,"below %lu ",(long unsigned int) BRADDR2NUM(stree,startnode->address)); fprintf(stderr,"depth %lu\n",stree->currentdepth);*/ if(processleaf(LEAFADDR2NUM(stree,currentnode.address),lcpnode,info) != 0) { return -1; } brotherval = LEAFBROTHERVAL(*(currentnode.address)); if(NILPTR(brotherval)) { readyforpop = true; currentnode.toleaf = false; } else { SETCURRENT(brotherval); // current comes from brother lcpnode = stack.spaceBref[stack.nextfreeBref-1]; } } else { if(readyforpop) { if(stack.nextfreeBref == UintConst(1)) { break; } (stack.nextfreeBref)--; /*fprintf(stderr,"#pop[%lu]=",(long unsigned int) stack.nextfreeBref); fprintf(stderr,"%lu\n", (long unsigned int) BRADDR2NUM(stree,stack.spaceBref[stack.nextfreeBref]));*/ PROCESSBRANCH2(stack.spaceBref[stack.nextfreeBref],info); brotherval = GETBROTHER(stack.spaceBref[stack.nextfreeBref]); if(!NILPTR(brotherval)) { SETCURRENT(brotherval); // current comes from brother lcpnode = stack.spaceBref[stack.nextfreeBref-1]; readyforpop = false; } } else { /*fprintf(stderr,"#process1 %lu\n", (long unsigned int) BRADDR2NUM(stree,currentnode.address));*/ PROCESSBRANCH1(currentnode.address,info); if(godown) { STOREINARRAY(&stack,Bref,128,currentnode.address); /*fprintf(stderr,"#push[%lu]=",(long unsigned int) (stack.nextfreeBref-1)); fprintf(stderr,"%lu\n",(long unsigned int) BRADDR2NUM(stree,currentnode.address));*/ child = GETCHILD(currentnode.address); SETCURRENT(child); // current comes from child } else { brotherval = GETBROTHER(currentnode.address); if(NILPTR(brotherval)) { readyforpop = true; } else { SETCURRENT(brotherval); // current comes brother } } } } } } FREEARRAY(&stack,Bref); return 0; }