static void linkrootchildren(Suffixtree *stree) { Uint *rcptr, *prevnodeptr, prev = 0; stree->alphasize = 0; for(rcptr = stree->rootchildren; rcptr <= stree->rootchildren + LARGESTCHARINDEX; rcptr++) { if(*rcptr != 0) { stree->alphasize++; if(prev == 0) { SETCHILD(stree->branchtab,MAKELARGE(*rcptr)); } else { if(ISLEAF(prev)) { stree->leaftab[GETLEAFINDEX(prev)] = *rcptr; } else { prevnodeptr = stree->branchtab + GETBRANCHINDEX(prev); SETBROTHER(prevnodeptr,*rcptr); } } prev = *rcptr; } } if(ISLEAF(prev)) { stree->leaftab[GETLEAFINDEX(prev)] = MAKELEAF(stree->textlen); } else { prevnodeptr = stree->branchtab + GETBRANCHINDEX(prev); SETBROTHER(prevnodeptr,MAKELEAF(stree->textlen)); } stree->leaftab[stree->textlen] = NILBIT; }
static void insertbranchnode(Suffixtree *stree) { Uint *ptr, *insertnodeptr, *insertleafptr, insertnodeptrbrother; spaceforbranchtab(stree); if(stree->headnodedepth == 0) { // head is the root stree->rootchildren[(Uint) *(stree->headstart)] = MAKEBRANCHADDR(stree->nextfreebranchnum); *(stree->nextfreebranch+1) = VALIDINIT; } else { if(stree->insertprev == 0) { // new branch = first child SETCHILD(stree->headnode,MAKEBRANCHADDR(stree->nextfreebranchnum)); } else { if(ISLEAF(stree->insertprev)) { // new branch = right brother of leaf ptr = stree->leaftab + GETLEAFINDEX(stree->insertprev); SETLEAFBROTHER(ptr,MAKEBRANCHADDR(stree->nextfreebranchnum)); } else { // new branch = brother of branching node SETBROTHER(stree->branchtab + GETBRANCHINDEX(stree->insertprev), MAKEBRANCHADDR(stree->nextfreebranchnum)); } } } if(ISLEAF(stree->insertnode)) { // split edge is leaf edge insertleafptr = stree->leaftab + GETLEAFINDEX(stree->insertnode); if (stree->tailptr == stree->sentinel || *(stree->headend+1) < *(stree->tailptr)) { SETNEWCHILDBROTHER(MAKELARGE(stree->insertnode), // first child=oldleaf LEAFBROTHERVAL(*insertleafptr)); // inherit brother RECALLNEWLEAFADDRESS(stree->nextfreeleafptr); SETLEAFBROTHER(insertleafptr, // new leaf = MAKELEAF(stree->nextfreeleafnum)); // right brother of old leaf } else { SETNEWCHILDBROTHER(MAKELARGELEAF(stree->nextfreeleafnum), // first child=new leaf LEAFBROTHERVAL(*insertleafptr)); // inherit brother *(stree->nextfreeleafptr) = stree->insertnode; // old leaf = right brother of of new leaf RECALLLEAFADDRESS(insertleafptr); } } else { // split edge leads to branching node insertnodeptr = stree->branchtab + GETBRANCHINDEX(stree->insertnode); insertnodeptrbrother = GETBROTHER(insertnodeptr); if (stree->tailptr == stree->sentinel || *(stree->headend+1) < *(stree->tailptr)) { SETNEWCHILDBROTHER(MAKELARGE(stree->insertnode), // first child new branch insertnodeptrbrother); // inherit right brother RECALLNEWLEAFADDRESS(stree->nextfreeleafptr); SETBROTHER(insertnodeptr,MAKELEAF(stree->nextfreeleafnum)); // new leaf = brother of old branch } else { SETNEWCHILDBROTHER(MAKELARGELEAF(stree->nextfreeleafnum), // first child is new leaf insertnodeptrbrother); // inherit brother *(stree->nextfreeleafptr) = stree->insertnode; // new branch is brother of new leaf RECALLBRANCHADDRESS(insertnodeptr); } } SETNILBIT; RECALLSUCC(MAKEBRANCHADDR(stree->nextfreebranchnum)); // node on s.cpp. path stree->currentdepth = stree->headnodedepth + (Uint) (stree->headend-stree->headstart+1); SETDEPTHEADPOS(stree->currentdepth,stree->nextfreeleafnum); SETMAXBRANCHDEPTH(stree->currentdepth); stree->nextfreeleafnum++; stree->nextfreeleafptr++; }
void rootsucclocationsstree(Suffixtree *stree,ArraySimpleloc *ll) { Uint headpos, leafindex, depth, distance, node, ch, *largeptr, *nodeptr; Simpleloc *llptr; CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1); for(ch = 0; ch <= UCHAR_MAX; ch++) { if((node = stree->rootchildren[ch]) != UNDEFINEDREFERENCE) { llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++; if(ISLEAF(node)) { leafindex = GETLEAFINDEX(node); llptr->textpos = leafindex; llptr->remain = stree->textlen - leafindex; llptr->nextnode.toleaf = true; llptr->nextnode.address = stree->leaftab + leafindex; } else { nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETBOTH(depth,headpos,nodeptr); llptr->textpos = headpos; llptr->remain = depth - 1; llptr->nextnode.toleaf = false; llptr->nextnode.address = nodeptr; } CHECKADDR(stree,llptr->nextnode); } } }
static void insertleaf(Suffixtree *stree) { Uint *ptr, newleaf; newleaf = MAKELEAF(stree->nextfreeleafnum); if(stree->headnodedepth == 0) { // head is the root if(stree->tailptr != stree->sentinel) { // no \$-edge initially stree->rootchildren[(Uint) *(stree->tailptr)] = newleaf; *(stree->nextfreeleafptr) = VALIDINIT; } } else { if (stree->insertprev == 0) { // newleaf = first child *(stree->nextfreeleafptr) = GETCHILD(stree->headnode); SETCHILD(stree->headnode,newleaf); } else { if(ISLEAF(stree->insertprev)) { // previous node is leaf ptr = stree->leaftab + GETLEAFINDEX(stree->insertprev); *(stree->nextfreeleafptr) = LEAFBROTHERVAL(*ptr); SETLEAFBROTHER(ptr,newleaf); } else { // previous node is branching node ptr = stree->branchtab + GETBRANCHINDEX(stree->insertprev); *(stree->nextfreeleafptr) = GETBROTHER(ptr); SETBROTHER(ptr,newleaf); } } } RECALLSUCC(newleaf); // recall node on s.cppessor path of \emph{headnode} stree->nextfreeleafnum++; stree->nextfreeleafptr++; }
void succlocationsstree(Suffixtree *stree,bool nosentinel,Simpleloc *loc, ArraySimpleloc *ll) { Uint succdepth, succ, leafindex, distance, depth, headpos, remain, *succptr, *largeptr, *nodeptr; Simpleloc *llptr; fprintf(stderr,"succlocationsstree\n"); ll->nextfreeSimpleloc = 0; CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1); if(loc->remain > 0) { if(nosentinel && loc->nextnode.toleaf && loc->remain <= UintConst(1)) { // at the end of leaf edge: only a\$ remains return; } llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++; llptr->textpos = loc->textpos + 1; llptr->remain = loc->remain - 1; llptr->nextnode.address = loc->nextnode.address; llptr->nextnode.toleaf = loc->nextnode.toleaf; CHECKADDR(stree,llptr->nextnode); return; } nodeptr = loc->nextnode.address; GETONLYDEPTH(depth,nodeptr); succ = GETCHILD(nodeptr); do // traverse the list of successors { if(ISLEAF(succ)) // successor is leaf { leafindex = GETLEAFINDEX(succ); remain = stree->textlen - (depth + leafindex); if(!nosentinel || remain >= UintConst(1)) { llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++; llptr->remain = remain; llptr->textpos = depth + leafindex; llptr->nextnode.address = stree->leaftab + leafindex; llptr->nextnode.toleaf = true; CHECKADDR(stree,llptr->nextnode); } succ = LEAFBROTHERVAL(stree->leaftab[leafindex]); } else // successor is branch node { succptr = stree->branchtab + GETBRANCHINDEX(succ); GETBOTH(succdepth,headpos,succptr); // get info for branch node llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++; llptr->textpos = depth + headpos; llptr->remain = succdepth - depth - 1; llptr->nextnode.toleaf = false; llptr->nextnode.address = succptr; CHECKADDR(stree,llptr->nextnode); succ = GETBROTHER(succptr); } } while(!NILPTR(succ)); }
static void int2ref(Suffixtree *stree,Reference *ref,Uint i) { if(ISLEAF(i)) { ref->toleaf = true; ref->address = stree->leaftab + GETLEAFINDEX(i); } else { ref->toleaf = false; ref->address = stree->branchtab + GETBRANCHINDEX(i); } }
static void scanprefix(Suffixtree *stree) { Uint *nodeptr = NULL, *largeptr = NULL, leafindex, nodedepth, edgelen, node, distance = 0, prevnode, prefixlen, headposition; SYMBOL *leftborder = (SYMBOL *) NULL, tailchar, edgechar = 0; if(stree->headnodedepth == 0) { // headnode is root if(stree->tailptr == stree->sentinel) { // there is no \$-edge stree->headend = NULL; return; } tailchar = *(stree->tailptr); if((node = stree->rootchildren[(Uint) tailchar]) == 0) { stree->headend = NULL; return; } if(ISLEAF(node)) { // s.cppessor edge is leaf, compare tail and leaf edge label leftborder = stree->text + GETLEAFINDEX(node); prefixlen = 1 + taillcp(stree,leftborder+1,stree->sentinel-1); (stree->tailptr) += prefixlen; stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; return; } nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETBOTH(nodedepth,headposition,nodeptr); // get info for branch node leftborder = stree->text + headposition; prefixlen = 1 + taillcp(stree,leftborder+1,leftborder + nodedepth - 1); (stree->tailptr)+= prefixlen; if(nodedepth > prefixlen) { // cannot reach the s.cppessor, fall out of tree stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; return; } stree->headnode = nodeptr; stree->headnodedepth = nodedepth; } while(True) { // \emph{headnode} is not the root prevnode = 0; node = GETCHILD(stree->headnode); if(stree->tailptr == stree->sentinel) { // \$-edge do { // there is no \$-edge, so find last s.cppessor, of which it becomes right brother prevnode = node; if(ISLEAF(node)) { node = LEAFBROTHERVAL(stree->leaftab[GETLEAFINDEX(node)]); } else { node = GETBROTHER(stree->branchtab + GETBRANCHINDEX(node)); } } while(!NILPTR(node)); stree->insertnode = NILBIT; stree->insertprev = prevnode; stree->headend = NULL; return; } tailchar = *(stree->tailptr); do { // find s.cppessor edge with firstchar = tailchar if(ISLEAF(node)) { // s.cppessor is leaf leafindex = GETLEAFINDEX(node); leftborder = stree->text + (stree->headnodedepth + leafindex); if((edgechar = *leftborder) >= tailchar) { // edge will not come later break; } prevnode = node; node = LEAFBROTHERVAL(stree->leaftab[leafindex]); } else { // s.cppessor is branch node nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETONLYHEADPOS(headposition,nodeptr); leftborder = stree->text + (stree->headnodedepth + headposition); if((edgechar = *leftborder) >= tailchar) { // edge will not come later break; } prevnode = node; node = GETBROTHER(nodeptr); } } while(!NILPTR(node)); if(NILPTR(node) || edgechar > tailchar) { // edge not found stree->insertprev = prevnode; // new edge will become brother of this stree->headend = NULL; return; } if(ISLEAF(node)) { // correct edge is leaf edge, compare its label with tail prefixlen = 1 + taillcp(stree,leftborder+1,stree->sentinel-1); (stree->tailptr) += prefixlen; stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; stree->insertprev = prevnode; return; } GETDEPTHAFTERHEADPOS(nodedepth,nodeptr); // we already know headposition edgelen = nodedepth - stree->headnodedepth; prefixlen = 1 + taillcp(stree,leftborder+1,leftborder + edgelen - 1); (stree->tailptr) += prefixlen; if(edgelen > prefixlen) { // cannot reach next node stree->headstart = leftborder; stree->headend = leftborder + (prefixlen-1); stree->insertnode = node; stree->insertprev = prevnode; return; } stree->headnode = nodeptr; stree->headnodedepth = nodedepth; } }
static void rescan (Suffixtree *stree) { Uint *nodeptr, *largeptr = NULL, distance = 0, node, prevnode, nodedepth, edgelen, wlen, leafindex, headposition; SYMBOL headchar, edgechar; if(stree->headnodedepth == 0) { // head is the root headchar = *(stree->headstart); // headstart is assumed to be not empty node = stree->rootchildren[(Uint) headchar]; if(ISLEAF(node)) { // stop if s.cppessor is leaf stree->insertnode = node; return; } nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETONLYDEPTH(nodedepth,nodeptr); wlen = (Uint) (stree->headend - stree->headstart + 1); if(nodedepth > wlen) { // cannot reach the s.cppessor node stree->insertnode = node; return; } stree->headnode = nodeptr; // go to s.cppessor node stree->headnodedepth = nodedepth; if(nodedepth == wlen) { // location has been scanned stree->headend = NULL; return; } (stree->headstart) += nodedepth; } while(True) { // \emph{headnode} is not the root headchar = *(stree->headstart); // \emph{headstart} is assumed to be nonempty prevnode = 0; node = GETCHILD(stree->headnode); while(True) { // traverse the list of s.cppessors if(ISLEAF(node)) { // s.cppessor is leaf leafindex = GETLEAFINDEX(node); edgechar = stree->text[stree->headnodedepth + leafindex]; if(edgechar == headchar) { // correct edge found stree->insertnode = node; stree->insertprev = prevnode; return; } prevnode = node; node = LEAFBROTHERVAL(stree->leaftab[leafindex]); } else { // s.cppessor is branch node nodeptr = stree->branchtab + GETBRANCHINDEX(node); GETONLYHEADPOS(headposition,nodeptr); edgechar = stree->text[stree->headnodedepth + headposition]; if(edgechar == headchar) { // correct edge found break; } prevnode = node; node = GETBROTHER(nodeptr); } } GETDEPTHAFTERHEADPOS(nodedepth,nodeptr); // get info about s.cpp node edgelen = nodedepth - stree->headnodedepth; wlen = (Uint) (stree->headend - stree->headstart + 1); if(edgelen > wlen) { // cannot reach the s.cpp node stree->insertnode = node; stree->insertprev = prevnode; return; } stree->headnode = nodeptr; // go to the s.cppessor node stree->headnodedepth = nodedepth; if(edgelen == wlen) { // location is found stree->headend = NULL; return; } (stree->headstart) += edgelen; } }