void test_cst_dfs_iterator_and_depth(Cst& cst, typename Cst::size_type times=1000000, bool output=false) { if (times > 2*cst.nodes()-cst.size()) times = 2*cst.nodes()-cst.size(); typedef typename Cst::size_type size_type; size_type cnt=0; write_R_output("cst","dfs and depth","begin",times,cnt); typename Cst::const_iterator it = cst.begin(); if (!output) { for (size_type i=0; i<times; ++i, ++it) { if (!cst.is_leaf(*it)) cnt += cst.depth(*it); } } else { for (size_type i=0; i<times; ++i, ++it) { if (!cst.is_leaf(*it)) { size_type d = cst.depth(*it); std::cerr << d << "-[" << cst.lb(*it) << "," << cst.rb(*it) << "] "; if (d < 60) { for (int i=1; i<=d; ++i) std::cerr<< cst.edge(*it, i); } std::cerr << std::endl; cnt += d; } } } write_R_output("cst","dfs and depth","end",times,cnt); }
void test_cst_child_operation(const Cst& cst, typename Cst::size_type times=5000, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; std::vector<node_type> nodes; generate_nodes_from_random_leaves(cst, times, nodes, x); // for(size_type i=0; i<20; ++i){ // std::cout<< cst.lb(nodes[i])<<" "<<cst.rb(nodes[i])<<std::endl; // } // choose some chars for the text unsigned char* letters = new unsigned char[nodes.size()+1]; for (size_type i=0; i<nodes.size(); ++i) { letters[i] = cst.csa.bwt[i]; } node_type c; // for child node size_type char_pos=0; size_type cnt=0; write_R_output("cst","child","begin",nodes.size(),cnt); for (size_type i=0; i<nodes.size(); ++i) { // if(i<20){ // std::cout<<"i="<<i<<" vl="<<cst.lb(nodes[i])<<" rb="<<cst.rb(nodes[i])<<std::endl; // std::cout<<cst.csa[cst.lb(nodes[i])]<<" "<<cst.depth(nodes[i])<<std::endl; // } c = cst.child(nodes[i], letters[i], char_pos); if (c==cst.root()) ++cnt; } write_R_output("cst","child","end",nodes.size(),cnt); delete [] letters; }
void test_cst_parent_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; srand(x); size_type n = cst.csa.size(); // take \f$ time \f$ random leaves std::vector<node_type> rand_leaf(times); for (size_type i=0; i<rand_leaf.size(); ++i) { rand_leaf[i] = cst.select_leaf(1+ (rand() % n)); } node_type p; size_type cnt=0; write_R_output("cst","parent","begin",times,cnt); for (size_type i=0; i<times; ++i, ++cnt) { p = cst.parent(rand_leaf[i]); while (p != cst.root()) { p = cst.parent(p); ++cnt; } } write_R_output("cst","parent","end",times,cnt); }
void test_cst_matching_statistics(const Cst& cst, unsigned char* S2, typename Cst::size_type n2) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; size_type cnt = 0; write_R_output("cst","mstats","begin",n2,cnt); size_type q = 0; // current match length size_type p2 = n2-1; // position in S2 size_type i = 0, j = cst.csa.size()-1; // \f$ \epsilon \f$ matches all suffixes of S1 while (p2+1 > 0) { size_type lb, rb; // perform backward search on interval \f$ [i,j] \f$ size_type size = algorithm::backward_search(cst.csa, i, j, S2[p2], lb, rb); if (size > 0) { q = q + 1; i = lb; j = rb; p2 = p2 - 1; } else if (i==0 and j == cst.csa.size()) { p2 = p2 -1; } else { // map interval to a node of the cst and calculate parent node_type p = cst.parent(cst.node(i, j)); q = cst.depth(p); // update match length i = cst.lb(p); // update left bound j = cst.rb(p); // update right bound } cnt += q; } write_R_output("cst","mstats","end",n2,cnt); }
//! Prefix increment of the iterator. iterator& operator++() { if (!m_valid) return *this; if (m_v == m_cst->root()) { m_valid = false; return *this; } value_type w = m_cst->sibling(m_v); if (w == m_cst->root()) { // if no next right sibling exist m_v = m_cst->parent(m_v); // go to parent } else { // if next right sibling exist m_v = m_cst->leftmost_leaf(w); // go to leaftmost leaf in the subtree of w } return *this; }
inline node_type parent() { --m_stack_size; // decrease stack size if (m_stack_cache != nullptr and m_stack_size < cache_size) { return m_stack_cache[m_stack_size]; } else return m_cst->parent(m_v); }
void generate_nodes_from_random_leaves(const Cst& cst, typename Cst::size_type times, std::vector<typename Cst::node_type>& nodes, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; srand(x); size_type n = cst.csa.size(); // generate nodes for (size_type i=0; i<times; ++i) { node_type p = cst.select_leaf(1+ (rand() % n)); nodes.push_back(p); while (p != cst.root()) { p = cst.parent(p); nodes.push_back(p); } } }
//! Prefix increment of the iterator. iterator& operator++() { if (!m_valid) return *this; if (m_queue.empty()) { m_valid = false; return *this; } value_type v = m_queue.front(); m_queue.pop(); value_type child = m_cst->select_child(v, 1); while (m_cst->root() != child) { m_queue.push(child); child = m_cst->sibling(child); } return *this; }
inline node_type first_child() { if (m_stack_cache != nullptr and m_stack_size < cache_size) // push node to the stack m_stack_cache[m_stack_size] = m_v; m_stack_size++; return m_cst->select_child(m_v, 1); }
void test_cst_1th_child_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; std::vector<node_type> nodes; generate_nodes_from_random_leaves(cst, times, nodes, x); node_type c; // for 1th_child node size_type cnt=0; write_R_output("cst","1th_child","begin",nodes.size(),cnt); for (size_type i=0; i<nodes.size(); ++i) { c = cst.select_child(nodes[i], 1); if (c==cst.root()) ++cnt; } write_R_output("cst","1th_child","end",nodes.size(),cnt); }
void test_cst_depth_operation_for_inner_nodes(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; std::vector<node_type> nodes; { std::vector<node_type> nodes2; generate_nodes_from_random_leaves(cst, times, nodes2, x); for (size_type i=0; i<nodes2.size(); ++i) if (!cst.is_leaf(nodes2[i])) { nodes.push_back(nodes2[i]); } } size_type cnt = 0; write_R_output("cst","depth of inner nodes","begin",nodes.size(),cnt); for (size_type i=0; i < nodes.size(); ++i) { cnt += cst.depth(nodes[i]); } write_R_output("cst","depth of inner nodes","end",nodes.size(),cnt); }
void test_cst_dfs_iterator(Cst& cst, typename Cst::size_type times=100000) { if (times > cst.nodes()) times = cst.nodes(); typedef typename Cst::size_type size_type; size_type cnt=0; { // calc values for cnt typename Cst::const_iterator it = cst.begin(); for (size_type i=0; i < std::min(times,(size_type)1000); ++i, ++it) { cnt += cst.depth(*it); } } write_R_output("cst","dfs","begin",times,cnt); typename Cst::const_iterator it = cst.begin(); for (size_type i=0; i<times; ++i) { ++it; } write_R_output("cst", "dfs", "end", times, cnt + cst.depth(*it)); }
//! Constructor cst_dfs_const_forward_iterator(const Cst* cst, const value_type node, bool visited=false, bool valid=true):m_visited(visited), m_valid(valid), m_stack_cache(nullptr) { m_cst = cst; m_v = node; if (m_cst == nullptr) { m_valid = false; } else if (m_v == m_cst->root() and !m_visited and m_valid) { // if the iterator equal cst.begin() m_stack_cache = new node_type[cache_size]; m_stack_size = 0; // std::cerr<<"#creating stack "<<m_cst->lb(m_v)<<" "<<m_cst->rb(m_v)<<std::endl; } }
void test_cst_dfs_iterator_and_id(Cst& cst, typename Cst::size_type times=1000000, bool output=false) { if (times > 2*cst.nodes()-cst.size()) times = 2*cst.nodes()-cst.size(); typedef typename Cst::size_type size_type; size_type cnt=0; write_R_output("cst","dfs and id","begin",times,cnt); typename Cst::const_iterator it = cst.begin(); if (!output) { for (size_type i=0; i<times; ++i, ++it) { cnt += cst.id(*it); } } else { for (size_type i=0; i<times; ++i, ++it) { size_type id = cst.id(*it); std::cerr << id << std::endl; cnt += id; } } write_R_output("cst","dfs and id","end",times,cnt); }
//! Prefix increment of the iterator. iterator& operator++() { if (!m_valid) return *this; if (m_v == m_cst->root() and m_visited) { m_valid = false; return *this; } value_type w; if (!m_visited) { // go down, if possible if (m_cst->is_leaf(m_v)) { w = m_cst->sibling(m_v); // determine sibling of leaf v if (w == m_cst->root()) { // if there exists no right sibling of the leaf v // w = m_cst->parent(m_v); w = parent(); m_visited = true; // go up } } else { // v is not a leaf => go down the tree w = first_child(); } } else { // w = m_cst->sibling(m_v); if (w == m_cst->root()) { // if there exists no right sibling w = parent(); } else { m_visited = false; } } m_v = w; return *this; }
void test_cst_depth_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; std::vector<node_type> nodes; generate_nodes_from_random_leaves(cst, times, nodes, x); size_type cnt = 0; write_R_output("cst","depth","begin",nodes.size(),cnt); for (size_type i=0; i < nodes.size(); ++i) { cnt += cst.depth(nodes[i]); } write_R_output("cst","depth","end",nodes.size(),cnt); }
void test_cst_sl_operation(const Cst& cst, typename Cst::size_type times=500, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; size_type n = cst.csa.size(); if (times > n) times = n; std::vector<node_type> nodes(times); srand(x); // take \f$ times \f$ random leaves and calculate each parent for (size_type i=0; i<times; ++i) { nodes[i] = cst.parent(cst.select_leaf(rand()%n + 1)); } size_type cnt=0; times = 0; write_R_output("cst","sl","begin",0,cnt); for (size_type i=0; i<nodes.size(); ++i) { node_type v = nodes[i]; // std::cout<<"v="<<cst.lb(v)<<" "<<cst.rb(v)<<std::endl; // size_type d = cst.depth(v); while (v != cst.root()) { // while v is not the root ++cnt; v = cst.sl(v); // follow suffix link // if( cnt < 30 ){ // std::cout<< cnt << " " << cst.lb(v) << " " << cst.rb(v) << " " << cst.depth(v) << std::endl; // } // size_type d2 = cst.depth(v); // if( d != d2+1 ){ // std::cout<<"error at cnt "<<cnt<<" d="<<d<<" d2="<<d2<<std::endl; // } // d = d2; } } write_R_output("cst","sl","end",cnt,cnt); }
void test_cst_lca_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17) { typedef typename Cst::size_type size_type; typedef typename Cst::node_type node_type; // generate \f$2^{19}\f$ random pairs of leafs size_type n = cst.csa.size(); uint64_t mask = (1<<20)-1; std::vector<node_type> nodes(1<<20); srand(x); for (size_type i=0; i < nodes.size(); ++i) { nodes[i] = cst.select_leaf(rand()%n + 1); } size_type cnt=0; write_R_output("cst","lca","begin",times,cnt); for (size_type i=0; i<times; ++i) { node_type v = cst.lca(nodes[(2*i) & mask], nodes[(2*i+1) & mask]); if (v == cst.root()) cnt++; // if(i<30) // std::cout<<"lca("<<cst.lb(nodes[(2*i)&mask])<<","<<cst.lb(nodes[(2*i+1)&mask])<<")=("<<cst.lb(v)<<","<<cst.rb(v)<<")"<<std::endl; } write_R_output("cst","lca","end",times,cnt); }
louds_tree(const Cst& cst, const CstBfsIterator begin, const CstBfsIterator end):m_bv(), m_bv_select1(), m_bv_select0(), bv(m_bv) { bit_vector tmp_bv(4*cst.size(*begin) , 0); // resize the bit_vector to the maximal // possible size 2*2*#leaves in the tree size_type pos = 0; for (CstBfsIterator it = begin; it != end;) { tmp_bv[pos++] = 1; size_type size = it.size(); ++it; pos += it.size()+1-size; } tmp_bv.resize(pos); m_bv = bit_vector_type(std::move(tmp_bv)); util::init_support(m_bv_select1, &m_bv); util::init_support(m_bv_select0, &m_bv); }