예제 #1
0
void ST_PrintNode(SUFFIX_TREE* tree, NODE* node1, long depth)
{
   NODE* node2 = node1->sons;
   long  d = depth , start = node1->edge_label_start , end;
   end     = get_node_label_end(tree, node1);

   if(depth>0)
   {
      /* Print the branches coming from higher nodes */
      while(d>1)
      {
         printf("|");
         d--;
      }
      printf("+");
      /* Print the node itself */
      while(start<=end)
      {
         printf("%c",tree->tree_string[start]);
         start++;
      }
      #ifdef DEBUG
         printf("  \t\t\t(%lu,%lu | %lu)",node1->edge_label_start,end,node1->path_position);
      #endif
      printf("\n");
   }
   /* Recoursive call for all node1's sons */
   while(node2!=0)
   {
      ST_PrintNode(tree,node2, depth+1);
      node2 = node2->right_sibling;
   }
}
예제 #2
0
void SuffixTree_print_node(SuffixTree_T tree, Node_T node1, long depth)
{
   Node_T node2 = node1->left_son;
   long  d = depth , start = node1->edge_label_start , end;
   end     = get_node_label_end(tree, node1);
   long orig_start = start;

   if(depth>0)
   {
      /* Print the branches coming from higher nodes */
      while(d>1)
      {
         printf("|");
         d--;
      }
      printf("+");
      /* Print the node itself */
      while(start<=end)
      {
         printf("%c",tree->tree_string[start]);
         start++;
      }

      printf("\t%zu\t%ld\t%ld\t%ld\t%ld", node1->index, orig_start,
             end, node1->path_position, node1->edge_depth);
      printf("\n");
   }
   /* Recoursive call for all node1's sons */
   while(node2!=0)
   {
      SuffixTree_print_node(tree,node2, depth+1);
      node2 = node2->right_sibling;
   }
}
예제 #3
0
DBL_WORD ST_FindSubstring(
                      /* The suffix array */
                      SUFFIX_TREE*    tree,      
                      /* The substring to find */
                      char*  W,         
                      /* The length of W */
                      DBL_WORD        P)         
{
   /* Starts with the root's son that has the first character of W as its
      incoming edge first character */
   NODE* node   = find_son(tree, tree->root, W[0]);
   DBL_WORD k,j = 0, node_label_end;

   /* Scan nodes down from the root untill a leaf is reached or the substring is
      found */
   while(node!=0)
   {
      k=node->edge_label_start;
      node_label_end = get_node_label_end(tree,node);
      
      /* Scan a single edge - compare each character with the searched string */
      while(j<P && k<=node_label_end && tree->tree_string[k] == W[j])
      {
         j++;
         k++;

#ifdef STATISTICS
         counter++;
#endif
      }
      
      /* Checking which of the stopping conditions are true */
      if(j == P)
      {
         /* W was found - it is a substring. Return its path starting index */
         return node->path_position;
      }
      else if(k > node_label_end)
         /* Current edge is found to match, continue to next edge */
         node = find_son(tree, node, W[j]);
      else
      {
         /* One non-matching symbols is found - W is not a substring */
         return ST_ERROR;
      }
   }
   return ST_ERROR;
}
예제 #4
0
void ST_PrintFullNode(SUFFIX_TREE* tree, NODE* node)
{
   long start, end;
   if(node==NULL)
      return;
   /* Calculating the begining and ending of the last edge */
   start   = node->edge_label_start;
   end     = get_node_label_end(tree, node);
   
   /* Stoping condition - the root */
   if(node->father!=tree->root)
      ST_PrintFullNode(tree,node->father);
   /* Print the last edge */
   while(start<=end)
   {
      printf("%c",tree->tree_string[start]);
      start++;
   }
}
예제 #5
0
SuffixTreeIndex_T SuffixTree_find_substring(const SuffixTree_T tree,
                                            char* query,
                                            SuffixTreeIndex_T query_length)
{
   /* Starts with the root's son that has the first character of W as its
      incoming edge first character */
   Node_T node   = find_son(tree, tree->root, query[0]);
   SuffixTreeIndex_T k,j = 0, node_label_end;

   /* Scan nodes down from the root untill a leaf is reached or the substring is
      found */
   while(node != NULL)
   {
      k=node->edge_label_start;
      node_label_end = get_node_label_end(tree,node);
      
      /* Scan a single edge - compare each character with the searched string */
      while(j<query_length && k<=node_label_end && tree->tree_string[k] == query[j])
      {
         j++;
         k++;
      }
      
      /* Checking which of the stopping conditions are true */
      if(j == query_length)
      {
         /* W was found - it is a substring. Return its path starting index */
         return node->path_position;
      }
      else if(k > node_label_end)
         /* Current edge is found to match, continue to next edge */
         node = find_son(tree, node, query[j]);
      else
      {
         /* One non-matching symbols is found - W is not a substring */
         return (SuffixTreeIndex_T)-1;
      }
   }
   return (SuffixTreeIndex_T)-1;
}
예제 #6
0
void SEA(
                      SUFFIX_TREE*   tree, 
                      POS*           pos,
                      PATH           str, 
                      DBL_WORD*      rule_applied,
                      char           after_rule_3)
{
   DBL_WORD   chars_found = 0 , path_pos = str.begin;
   NODE*      tmp;
 
#ifdef DEBUG   
   ST_PrintTree(tree);
   printf("extension: %lu  phase+1: %lu",str.begin, str.end);
   if(after_rule_3 == 0)
      printf("   followed from (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos);
   else
      printf("   starting at (%lu,%lu | %lu) ", pos->node->edge_label_start, get_node_label_end(tree,pos->node), pos->edge_pos);
#endif

#ifdef STATISTICS
   counter++;
#endif

   /* Follow suffix link only if it's not the first extension after rule 3 was applied */
   if(after_rule_3 == 0)
      follow_suffix_link(tree, pos);

#ifdef DEBUG   
#ifdef STATISTICS
   if(after_rule_3 == 0)
      printf("to (%lu,%lu | %lu). counter: %lu\n", pos->node->edge_label_start, get_node_label_end(tree,pos->node),pos->edge_pos,counter);
   else
      printf(". counter: %lu\n", counter);
#endif
#endif

   /* If node is root - trace whole string starting from the root, else - trace last character only */
   if(pos->node == tree->root)
   {
      pos->node = trace_string(tree, tree->root, str, &(pos->edge_pos), &chars_found, no_skip);
   }
   else
   {
      str.begin = str.end;
      chars_found = 0;

      /* Consider 2 cases:
         1. last character matched is the last of its edge */
      if(is_last_char_in_edge(tree,pos->node,pos->edge_pos))
      {
         /* Trace only last symbol of str, search in the  NEXT edge (node) */
         tmp = find_son(tree, pos->node, tree->tree_string[str.end]);
         if(tmp != 0)
         {
            pos->node      = tmp;
            pos->edge_pos   = 0;
            chars_found      = 1;
         }
      }
      /* 2. last character matched is NOT the last of its edge */
      else
      {
         /* Trace only last symbol of str, search in the CURRENT edge (node) */
         if(tree->tree_string[pos->node->edge_label_start+pos->edge_pos+1] == tree->tree_string[str.end])
         {
            pos->edge_pos++;
            chars_found   = 1;
         }
      }
   }

   /* If whole string was found - rule 3 applies */
   if(chars_found == str.end - str.begin + 1)
   {
      *rule_applied = 3;
      /* If there is an internal node that has no suffix link yet (only one may 
         exist) - create a suffix link from it to the father-node of the 
         current position in the tree (pos) */
      if(suffixless != 0)
      {
         create_suffix_link(suffixless, pos->node->father);
         /* Marks that no internal node with no suffix link exists */
         suffixless = 0;
      }

      #ifdef DEBUG   
         printf("rule 3 (%lu,%lu)\n",str.begin,str.end);
      #endif
      return;
   }
   
   /* If last char found is the last char of an edge - add a character at the 
      next edge */
   if(is_last_char_in_edge(tree,pos->node,pos->edge_pos) || pos->node == tree->root)
   {
      /* Decide whether to apply rule 2 (new_son) or rule 1 */
      if(pos->node->sons != 0)
      {
         /* Apply extension rule 2 new son - a new leaf is created and returned 
            by apply_extension_rule_2 */
         apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, 0, new_son);
         *rule_applied = 2;
         /* If there is an internal node that has no suffix link yet (only one 
            may exist) - create a suffix link from it to the father-node of the 
            current position in the tree (pos) */
         if(suffixless != 0)
         {
            create_suffix_link(suffixless, pos->node);
            /* Marks that no internal node with no suffix link exists */
            suffixless = 0;
         }
      }
   }
   else
   {
      /* Apply extension rule 2 split - a new node is created and returned by 
         apply_extension_rule_2 */
      tmp = apply_extension_rule_2(pos->node, str.begin+chars_found, str.end, path_pos, pos->edge_pos, split);
      if(suffixless != 0)
         create_suffix_link(suffixless, tmp);
      /* Link root's sons with a single character to the root */
      if(get_node_label_length(tree,tmp) == 1 && tmp->father == tree->root)
      {
         tmp->suffix_link = tree->root;
         /* Marks that no internal node with no suffix link exists */
         suffixless = 0;
      }
      else
         /* Mark tmp as waiting for a link */
         suffixless = tmp;
      
      /* Prepare pos for the next extension */
      pos->node = tmp;
      *rule_applied = 2;
   }
}
예제 #7
0
DBL_WORD get_node_label_length(SUFFIX_TREE* tree, NODE* node)
{
   /* Calculate and return the lentgh of the node */
   return get_node_label_end(tree, node) - node->edge_label_start + 1;
}
예제 #8
0
int Node_is_leaf(Node_T node, SuffixTree_T tree) {
  return get_node_label_end(tree, node) == tree->e;
}
예제 #9
0
SuffixTreeIndex_T Node_get_incoming_edge_length(Node_T node, SuffixTree_T tree)
{
  return get_node_label_end(tree, node) - node->edge_label_start + 1;
}
예제 #10
0
SuffixTreeIndex_T get_node_label_length(SuffixTree_T tree, Node_T node)
{
   /* Calculate and return the lentgh of the node */
   return get_node_label_end(tree, node) - node->edge_label_start + 1;
}