Beispiel #1
0
static void linkrootchildren(Suffixtree *stree) {
    Uint *rcptr, *prevnodeptr, prev = 0;

    stree->alphasize = 0;
    for(rcptr = stree->rootchildren;
            rcptr <= stree->rootchildren + LARGESTCHARINDEX; rcptr++) {
        if(*rcptr != 0) {
            stree->alphasize++;
            if(prev == 0) {
                SETCHILD(stree->branchtab,MAKELARGE(*rcptr));
            } else {
                if(ISLEAF(prev)) {
                    stree->leaftab[GETLEAFINDEX(prev)] = *rcptr;
                } else {
                    prevnodeptr = stree->branchtab + GETBRANCHINDEX(prev);
                    SETBROTHER(prevnodeptr,*rcptr);
                }
            }
            prev = *rcptr;
        }
    }
    if(ISLEAF(prev)) {
        stree->leaftab[GETLEAFINDEX(prev)] = MAKELEAF(stree->textlen);
    } else {
        prevnodeptr = stree->branchtab + GETBRANCHINDEX(prev);
        SETBROTHER(prevnodeptr,MAKELEAF(stree->textlen));
    }
    stree->leaftab[stree->textlen] = NILBIT;
}
Beispiel #2
0
static void insertbranchnode(Suffixtree *stree) {
    Uint *ptr, *insertnodeptr, *insertleafptr, insertnodeptrbrother;

    spaceforbranchtab(stree);
    if(stree->headnodedepth == 0) {    // head is the root
        stree->rootchildren[(Uint) *(stree->headstart)]
            = MAKEBRANCHADDR(stree->nextfreebranchnum);
        *(stree->nextfreebranch+1) = VALIDINIT;
    } else {
        if(stree->insertprev == 0) { // new branch = first child
            SETCHILD(stree->headnode,MAKEBRANCHADDR(stree->nextfreebranchnum));
        } else {
            if(ISLEAF(stree->insertprev)) { // new branch = right brother of leaf
                ptr = stree->leaftab + GETLEAFINDEX(stree->insertprev);
                SETLEAFBROTHER(ptr,MAKEBRANCHADDR(stree->nextfreebranchnum));
            } else {                   // new branch = brother of branching node
                SETBROTHER(stree->branchtab + GETBRANCHINDEX(stree->insertprev),
                           MAKEBRANCHADDR(stree->nextfreebranchnum));
            }
        }
    }
    if(ISLEAF(stree->insertnode)) { // split edge is leaf edge
        insertleafptr = stree->leaftab + GETLEAFINDEX(stree->insertnode);
        if (stree->tailptr == stree->sentinel ||
                *(stree->headend+1) < *(stree->tailptr)) {
            SETNEWCHILDBROTHER(MAKELARGE(stree->insertnode),  // first child=oldleaf
                               LEAFBROTHERVAL(*insertleafptr));  // inherit brother
            RECALLNEWLEAFADDRESS(stree->nextfreeleafptr);
            SETLEAFBROTHER(insertleafptr,                     // new leaf =
                           MAKELEAF(stree->nextfreeleafnum)); // right brother of old leaf
        } else {
            SETNEWCHILDBROTHER(MAKELARGELEAF(stree->nextfreeleafnum),  // first child=new leaf
                               LEAFBROTHERVAL(*insertleafptr));  // inherit brother
            *(stree->nextfreeleafptr) = stree->insertnode;  // old leaf = right brother of of new leaf
            RECALLLEAFADDRESS(insertleafptr);
        }
    } else { // split edge leads to branching node
        insertnodeptr = stree->branchtab + GETBRANCHINDEX(stree->insertnode);
        insertnodeptrbrother = GETBROTHER(insertnodeptr);
        if (stree->tailptr == stree->sentinel ||
                *(stree->headend+1) < *(stree->tailptr)) {
            SETNEWCHILDBROTHER(MAKELARGE(stree->insertnode), // first child new branch
                               insertnodeptrbrother);        // inherit right brother
            RECALLNEWLEAFADDRESS(stree->nextfreeleafptr);
            SETBROTHER(insertnodeptr,MAKELEAF(stree->nextfreeleafnum)); // new leaf = brother of old branch
        } else {
            SETNEWCHILDBROTHER(MAKELARGELEAF(stree->nextfreeleafnum), // first child is new leaf
                               insertnodeptrbrother);        // inherit brother
            *(stree->nextfreeleafptr) = stree->insertnode;   // new branch is brother of new leaf
            RECALLBRANCHADDRESS(insertnodeptr);
        }
    }
    SETNILBIT;
    RECALLSUCC(MAKEBRANCHADDR(stree->nextfreebranchnum)); // node on s.cpp. path
    stree->currentdepth = stree->headnodedepth + (Uint) (stree->headend-stree->headstart+1);
    SETDEPTHEADPOS(stree->currentdepth,stree->nextfreeleafnum);
    SETMAXBRANCHDEPTH(stree->currentdepth);
    stree->nextfreeleafnum++;
    stree->nextfreeleafptr++;
}
Beispiel #3
0
void rootsucclocationsstree(Suffixtree *stree,ArraySimpleloc *ll)
{
  Uint headpos, leafindex, depth, distance, node, ch, *largeptr, *nodeptr;
  Simpleloc *llptr;

  CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1);
  for(ch = 0; ch <= UCHAR_MAX; ch++)
  {
    if((node = stree->rootchildren[ch]) != UNDEFINEDREFERENCE)
    {
      llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
      if(ISLEAF(node))
      {
        leafindex = GETLEAFINDEX(node);
        llptr->textpos = leafindex;
        llptr->remain = stree->textlen - leafindex;
        llptr->nextnode.toleaf = true;
        llptr->nextnode.address = stree->leaftab + leafindex;
      } else
      {
        nodeptr = stree->branchtab + GETBRANCHINDEX(node);
        GETBOTH(depth,headpos,nodeptr);
        llptr->textpos = headpos;
        llptr->remain = depth - 1;
        llptr->nextnode.toleaf = false;
        llptr->nextnode.address = nodeptr;
      }
      CHECKADDR(stree,llptr->nextnode);
    }
  }
}
Beispiel #4
0
static void insertleaf(Suffixtree *stree) {
    Uint *ptr, newleaf;

    newleaf = MAKELEAF(stree->nextfreeleafnum);
    if(stree->headnodedepth == 0) {              // head is the root
        if(stree->tailptr != stree->sentinel) {    // no \$-edge initially
            stree->rootchildren[(Uint) *(stree->tailptr)] = newleaf;
            *(stree->nextfreeleafptr) = VALIDINIT;
        }
    } else {
        if (stree->insertprev == 0) { // newleaf = first child
            *(stree->nextfreeleafptr) = GETCHILD(stree->headnode);
            SETCHILD(stree->headnode,newleaf);
        } else {
            if(ISLEAF(stree->insertprev)) { // previous node is leaf
                ptr = stree->leaftab + GETLEAFINDEX(stree->insertprev);
                *(stree->nextfreeleafptr) = LEAFBROTHERVAL(*ptr);
                SETLEAFBROTHER(ptr,newleaf);
            } else { // previous node is branching node
                ptr = stree->branchtab + GETBRANCHINDEX(stree->insertprev);
                *(stree->nextfreeleafptr) = GETBROTHER(ptr);
                SETBROTHER(ptr,newleaf);
            }
        }
    }
    RECALLSUCC(newleaf);     // recall node on s.cppessor path of \emph{headnode}
    stree->nextfreeleafnum++;
    stree->nextfreeleafptr++;
}
Beispiel #5
0
void succlocationsstree(Suffixtree *stree,bool nosentinel,Simpleloc *loc,
                        ArraySimpleloc *ll)
{
  Uint succdepth, succ, leafindex, distance, depth, headpos, 
       remain, *succptr, *largeptr, *nodeptr;
  Simpleloc *llptr;

  fprintf(stderr,"succlocationsstree\n");
  ll->nextfreeSimpleloc = 0;
  CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1);
  if(loc->remain > 0)
  {
    if(nosentinel && loc->nextnode.toleaf && loc->remain <= UintConst(1))  
    {  // at the end of leaf edge: only a\$ remains
      return;
    } 
    llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
    llptr->textpos = loc->textpos + 1;
    llptr->remain = loc->remain - 1;
    llptr->nextnode.address = loc->nextnode.address;
    llptr->nextnode.toleaf = loc->nextnode.toleaf;
    CHECKADDR(stree,llptr->nextnode);
    return;
  }
  nodeptr = loc->nextnode.address;
  GETONLYDEPTH(depth,nodeptr);
  succ = GETCHILD(nodeptr);
  do                   // traverse the list of successors
  {
    if(ISLEAF(succ))   // successor is leaf
    {
      leafindex = GETLEAFINDEX(succ);
      remain = stree->textlen - (depth + leafindex);
      if(!nosentinel || remain >= UintConst(1))
      {
        llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
        llptr->remain = remain;
        llptr->textpos = depth + leafindex;
        llptr->nextnode.address = stree->leaftab + leafindex;
        llptr->nextnode.toleaf = true;
        CHECKADDR(stree,llptr->nextnode);
      }
      succ = LEAFBROTHERVAL(stree->leaftab[leafindex]);
    } else   // successor is branch node
    {
      succptr = stree->branchtab + GETBRANCHINDEX(succ);
      GETBOTH(succdepth,headpos,succptr);  // get info for branch node
      llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
      llptr->textpos = depth + headpos;
      llptr->remain = succdepth - depth - 1;
      llptr->nextnode.toleaf = false;
      llptr->nextnode.address = succptr;
      CHECKADDR(stree,llptr->nextnode);
      succ = GETBROTHER(succptr);
    }
  } while(!NILPTR(succ));
}
Beispiel #6
0
static void int2ref(Suffixtree *stree,Reference *ref,Uint i)
{
  if(ISLEAF(i))
  {
    ref->toleaf = true;
    ref->address = stree->leaftab + GETLEAFINDEX(i);
  } else
  {
    ref->toleaf = false;
    ref->address = stree->branchtab + GETBRANCHINDEX(i);
  }
}
Beispiel #7
0
static void scanprefix(Suffixtree *stree) {
    Uint *nodeptr = NULL, *largeptr = NULL, leafindex, nodedepth, edgelen, node,
          distance = 0, prevnode, prefixlen, headposition;
    SYMBOL *leftborder = (SYMBOL *) NULL, tailchar, edgechar = 0;

    if(stree->headnodedepth == 0) { // headnode is root
        if(stree->tailptr == stree->sentinel) { // there is no \$-edge
            stree->headend = NULL;
            return;
        }
        tailchar = *(stree->tailptr);
        if((node = stree->rootchildren[(Uint) tailchar]) == 0) {
            stree->headend = NULL;
            return;
        }
        if(ISLEAF(node)) { // s.cppessor edge is leaf, compare tail and leaf edge label
            leftborder = stree->text + GETLEAFINDEX(node);
            prefixlen = 1 + taillcp(stree,leftborder+1,stree->sentinel-1);
            (stree->tailptr) += prefixlen;
            stree->headstart = leftborder;
            stree->headend = leftborder + (prefixlen-1);
            stree->insertnode = node;
            return;
        }
        nodeptr = stree->branchtab + GETBRANCHINDEX(node);
        GETBOTH(nodedepth,headposition,nodeptr);  // get info for branch node
        leftborder = stree->text + headposition;
        prefixlen = 1 + taillcp(stree,leftborder+1,leftborder + nodedepth - 1);
        (stree->tailptr)+= prefixlen;
        if(nodedepth > prefixlen) { // cannot reach the s.cppessor, fall out of tree
            stree->headstart = leftborder;
            stree->headend = leftborder + (prefixlen-1);
            stree->insertnode = node;
            return;
        }
        stree->headnode = nodeptr;
        stree->headnodedepth = nodedepth;
    }
    while(True) { // \emph{headnode} is not the root
        prevnode = 0;
        node = GETCHILD(stree->headnode);
        if(stree->tailptr == stree->sentinel) { //  \$-edge
            do { // there is no \$-edge, so find last s.cppessor, of which it becomes right brother
                prevnode = node;
                if(ISLEAF(node)) {
                    node = LEAFBROTHERVAL(stree->leaftab[GETLEAFINDEX(node)]);
                } else {
                    node = GETBROTHER(stree->branchtab + GETBRANCHINDEX(node));
                }
            } while(!NILPTR(node));
            stree->insertnode = NILBIT;
            stree->insertprev = prevnode;
            stree->headend = NULL;
            return;
        }
        tailchar = *(stree->tailptr);

        do { // find s.cppessor edge with firstchar = tailchar
            if(ISLEAF(node)) { // s.cppessor is leaf
                leafindex = GETLEAFINDEX(node);
                leftborder = stree->text + (stree->headnodedepth + leafindex);
                if((edgechar = *leftborder) >= tailchar) { // edge will not come later
                    break;
                }
                prevnode = node;
                node = LEAFBROTHERVAL(stree->leaftab[leafindex]);
            } else { // s.cppessor is branch node
                nodeptr = stree->branchtab + GETBRANCHINDEX(node);
                GETONLYHEADPOS(headposition,nodeptr);
                leftborder = stree->text + (stree->headnodedepth + headposition);
                if((edgechar = *leftborder) >= tailchar) { // edge will not come later
                    break;
                }
                prevnode = node;
                node = GETBROTHER(nodeptr);
            }
        } while(!NILPTR(node));
        if(NILPTR(node) || edgechar > tailchar) { // edge not found
            stree->insertprev = prevnode;   // new edge will become brother of this
            stree->headend = NULL;
            return;
        }
        if(ISLEAF(node)) { // correct edge is leaf edge, compare its label with tail
            prefixlen = 1 + taillcp(stree,leftborder+1,stree->sentinel-1);
            (stree->tailptr) += prefixlen;
            stree->headstart = leftborder;
            stree->headend = leftborder + (prefixlen-1);
            stree->insertnode = node;
            stree->insertprev = prevnode;
            return;
        }
        GETDEPTHAFTERHEADPOS(nodedepth,nodeptr); // we already know headposition
        edgelen = nodedepth - stree->headnodedepth;
        prefixlen = 1 + taillcp(stree,leftborder+1,leftborder + edgelen - 1);
        (stree->tailptr) += prefixlen;
        if(edgelen > prefixlen) { // cannot reach next node
            stree->headstart = leftborder;
            stree->headend = leftborder + (prefixlen-1);
            stree->insertnode = node;
            stree->insertprev = prevnode;
            return;
        }
        stree->headnode = nodeptr;
        stree->headnodedepth = nodedepth;
    }
}
Beispiel #8
0
static void rescan (Suffixtree *stree) {
    Uint *nodeptr, *largeptr = NULL, distance = 0, node, prevnode,
                    nodedepth, edgelen, wlen, leafindex, headposition;
    SYMBOL headchar, edgechar;

    if(stree->headnodedepth == 0) { // head is the root
        headchar = *(stree->headstart);  // headstart is assumed to be not empty
        node = stree->rootchildren[(Uint) headchar];
        if(ISLEAF(node)) { // stop if s.cppessor is leaf
            stree->insertnode = node;
            return;
        }
        nodeptr = stree->branchtab + GETBRANCHINDEX(node);
        GETONLYDEPTH(nodedepth,nodeptr);
        wlen = (Uint) (stree->headend - stree->headstart + 1);
        if(nodedepth > wlen) {  // cannot reach the s.cppessor node
            stree->insertnode = node;
            return;
        }
        stree->headnode = nodeptr;        // go to s.cppessor node
        stree->headnodedepth = nodedepth;
        if(nodedepth == wlen) {           // location has been scanned
            stree->headend = NULL;
            return;
        }
        (stree->headstart) += nodedepth;
    }
    while(True) { // \emph{headnode} is not the root
        headchar = *(stree->headstart);  // \emph{headstart} is assumed to be nonempty
        prevnode = 0;
        node = GETCHILD(stree->headnode);
        while(True) {           // traverse the list of s.cppessors
            if(ISLEAF(node)) { // s.cppessor is leaf
                leafindex = GETLEAFINDEX(node);
                edgechar = stree->text[stree->headnodedepth + leafindex];
                if(edgechar == headchar) {  // correct edge found
                    stree->insertnode = node;
                    stree->insertprev = prevnode;
                    return;
                }
                prevnode = node;
                node = LEAFBROTHERVAL(stree->leaftab[leafindex]);
            } else { // s.cppessor is branch node
                nodeptr = stree->branchtab + GETBRANCHINDEX(node);
                GETONLYHEADPOS(headposition,nodeptr);
                edgechar = stree->text[stree->headnodedepth + headposition];
                if(edgechar == headchar) { // correct edge found
                    break;
                }
                prevnode = node;
                node = GETBROTHER(nodeptr);
            }
        }

        GETDEPTHAFTERHEADPOS(nodedepth,nodeptr);     // get info about s.cpp node
        edgelen = nodedepth - stree->headnodedepth;
        wlen = (Uint) (stree->headend - stree->headstart + 1);
        if(edgelen > wlen) {   // cannot reach the s.cpp node
            stree->insertnode = node;
            stree->insertprev = prevnode;
            return;
        }
        stree->headnode = nodeptr;    // go to the s.cppessor node
        stree->headnodedepth = nodedepth;
        if(edgelen == wlen) {                  // location is found
            stree->headend = NULL;
            return;
        }
        (stree->headstart) += edgelen;
    }
}