예제 #1
0
void test_cst_dfs_iterator_and_depth(Cst& cst, typename Cst::size_type times=1000000, bool output=false)
{
    if (times > 2*cst.nodes()-cst.size())
        times = 2*cst.nodes()-cst.size();
    typedef typename Cst::size_type size_type;
    size_type cnt=0;
    write_R_output("cst","dfs and depth","begin",times,cnt);
    typename Cst::const_iterator it = cst.begin();
    if (!output) {
        for (size_type i=0; i<times; ++i, ++it) {
            if (!cst.is_leaf(*it))
                cnt += cst.depth(*it);
        }
    } else {
        for (size_type i=0; i<times; ++i, ++it) {
            if (!cst.is_leaf(*it)) {
                size_type d = cst.depth(*it);
                std::cerr << d << "-[" << cst.lb(*it) << "," << cst.rb(*it) << "] ";
                if (d < 60) {
                    for (int i=1; i<=d; ++i)
                        std::cerr<< cst.edge(*it, i);
                }
                std::cerr << std::endl;
                cnt += d;
            }
        }
    }
    write_R_output("cst","dfs and depth","end",times,cnt);
}
예제 #2
0
void test_cst_child_operation(const Cst& cst, typename Cst::size_type times=5000, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;

    std::vector<node_type> nodes;
    generate_nodes_from_random_leaves(cst, times, nodes, x);
//	for(size_type i=0; i<20; ++i){
//		std::cout<< cst.lb(nodes[i])<<" "<<cst.rb(nodes[i])<<std::endl;
//	}
    // choose some chars for the text
    unsigned char* letters = new unsigned char[nodes.size()+1];
    for (size_type i=0; i<nodes.size(); ++i) {
        letters[i] = cst.csa.bwt[i];
    }

    node_type c;  // for child node
    size_type char_pos=0;
    size_type cnt=0;
    write_R_output("cst","child","begin",nodes.size(),cnt);
    for (size_type i=0; i<nodes.size(); ++i) {
//		if(i<20){
//			std::cout<<"i="<<i<<" vl="<<cst.lb(nodes[i])<<" rb="<<cst.rb(nodes[i])<<std::endl;
//			std::cout<<cst.csa[cst.lb(nodes[i])]<<" "<<cst.depth(nodes[i])<<std::endl;
//		}
        c = cst.child(nodes[i], letters[i], char_pos);
        if (c==cst.root())
            ++cnt;
    }
    write_R_output("cst","child","end",nodes.size(),cnt);
    delete [] letters;
}
예제 #3
0
void test_cst_parent_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;

    srand(x);
    size_type n = cst.csa.size();
    // take \f$ time \f$ random leaves
    std::vector<node_type> rand_leaf(times);
    for (size_type i=0; i<rand_leaf.size(); ++i) {
        rand_leaf[i] = cst.select_leaf(1+ (rand() % n));
    }

    node_type p;
    size_type cnt=0;
    write_R_output("cst","parent","begin",times,cnt);
    for (size_type i=0; i<times; ++i, ++cnt) {
        p = cst.parent(rand_leaf[i]);
        while (p != cst.root()) {
            p = cst.parent(p);
            ++cnt;
        }
    }
    write_R_output("cst","parent","end",times,cnt);
}
예제 #4
0
void test_cst_matching_statistics(const Cst& cst, unsigned char* S2, typename Cst::size_type n2)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;

    size_type cnt = 0;
    write_R_output("cst","mstats","begin",n2,cnt);
    size_type q  = 0;						// current match length
    size_type p2 = n2-1;              // position in S2
    size_type i  = 0, j = cst.csa.size()-1; // \f$ \epsilon \f$ matches all suffixes of S1
    while (p2+1 > 0) {
        size_type lb, rb;
        // perform backward search on interval \f$ [i,j] \f$
        size_type size = algorithm::backward_search(cst.csa, i, j, S2[p2], lb, rb);
        if (size > 0) {
            q = q + 1;
            i = lb; j = rb;
            p2 = p2 - 1;
        } else if (i==0 and j == cst.csa.size()) {
            p2 = p2 -1;
        } else {
            // map interval to a node of the cst and calculate parent
            node_type p = cst.parent(cst.node(i, j));
            q = cst.depth(p);	// update match length
            i = cst.lb(p); 		// update left bound
            j = cst.rb(p);		// update right bound
        }
        cnt += q;
    }
    write_R_output("cst","mstats","end",n2,cnt);
}
예제 #5
0
	//! Prefix increment of the iterator.
	iterator& operator++()
	{
		if (!m_valid) return *this;
		if (m_v == m_cst->root()) {
			m_valid = false;
			return *this;
		}
		value_type w = m_cst->sibling(m_v);
		if (w == m_cst->root()) {		   // if no next right sibling exist
			m_v = m_cst->parent(m_v);	  // go to parent
		} else {						   // if next right sibling exist
			m_v = m_cst->leftmost_leaf(w); // go to leaftmost leaf in the subtree of w
		}
		return *this;
	}
예제 #6
0
 inline node_type parent() {
     --m_stack_size; // decrease stack size
     if (m_stack_cache != nullptr and m_stack_size < cache_size) {
         return m_stack_cache[m_stack_size];
     } else
         return m_cst->parent(m_v);
 }
예제 #7
0
void generate_nodes_from_random_leaves(const Cst& cst, typename Cst::size_type times, std::vector<typename Cst::node_type>& nodes, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;
    srand(x);
    size_type n = cst.csa.size();
    // generate nodes
    for (size_type i=0; i<times; ++i) {
        node_type p = cst.select_leaf(1+ (rand() % n));
        nodes.push_back(p);
        while (p != cst.root()) {
            p = cst.parent(p);
            nodes.push_back(p);
        }
    }
}
예제 #8
0
	//! Prefix increment of the iterator.
	iterator& operator++()
	{
		if (!m_valid) return *this;
		if (m_queue.empty()) {
			m_valid = false;
			return *this;
		}
		value_type v = m_queue.front();
		m_queue.pop();
		value_type child = m_cst->select_child(v, 1);
		while (m_cst->root() != child) {
			m_queue.push(child);
			child = m_cst->sibling(child);
		}
		return *this;
	}
예제 #9
0
	inline node_type first_child()
	{
		if (m_stack_cache != nullptr and m_stack_size < cache_size) // push node to the stack
			m_stack_cache[m_stack_size] = m_v;
		m_stack_size++;
		return m_cst->select_child(m_v, 1);
	}
예제 #10
0
void test_cst_1th_child_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;

    std::vector<node_type> nodes;
    generate_nodes_from_random_leaves(cst, times, nodes, x);

    node_type c;  // for 1th_child node
    size_type cnt=0;
    write_R_output("cst","1th_child","begin",nodes.size(),cnt);
    for (size_type i=0; i<nodes.size(); ++i) {
        c = cst.select_child(nodes[i], 1);
        if (c==cst.root())
            ++cnt;
    }
    write_R_output("cst","1th_child","end",nodes.size(),cnt);
}
예제 #11
0
void test_cst_depth_operation_for_inner_nodes(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;
    std::vector<node_type> nodes;
    {
        std::vector<node_type> nodes2;
        generate_nodes_from_random_leaves(cst, times, nodes2, x);
        for (size_type i=0; i<nodes2.size(); ++i)
            if (!cst.is_leaf(nodes2[i])) {
                nodes.push_back(nodes2[i]);
            }
    }
    size_type cnt = 0;
    write_R_output("cst","depth of inner nodes","begin",nodes.size(),cnt);
    for (size_type i=0; i < nodes.size(); ++i) {
        cnt += cst.depth(nodes[i]);
    }
    write_R_output("cst","depth of inner nodes","end",nodes.size(),cnt);
}
예제 #12
0
void test_cst_dfs_iterator(Cst& cst, typename Cst::size_type times=100000)
{
    if (times > cst.nodes())
        times = cst.nodes();
    typedef typename Cst::size_type size_type;
    size_type cnt=0;
    {
        // calc values for cnt
        typename Cst::const_iterator it = cst.begin();
        for (size_type i=0; i < std::min(times,(size_type)1000); ++i, ++it) {
            cnt += cst.depth(*it);
        }
    }
    write_R_output("cst","dfs","begin",times,cnt);
    typename Cst::const_iterator it = cst.begin();
    for (size_type i=0; i<times; ++i) {
        ++it;
    }
    write_R_output("cst", "dfs", "end", times, cnt + cst.depth(*it));
}
예제 #13
0
        //! Constructor
        cst_dfs_const_forward_iterator(const Cst* cst, const value_type node, bool visited=false, bool valid=true):m_visited(visited), m_valid(valid), m_stack_cache(nullptr) {
            m_cst = cst;
            m_v = node;
            if (m_cst == nullptr) {
                m_valid = false;
            } else if (m_v == m_cst->root() and !m_visited and m_valid) { // if the iterator equal cst.begin()
                m_stack_cache = new node_type[cache_size];
                m_stack_size  = 0;
//			std::cerr<<"#creating stack "<<m_cst->lb(m_v)<<" "<<m_cst->rb(m_v)<<std::endl;
            }
        }
예제 #14
0
void test_cst_dfs_iterator_and_id(Cst& cst, typename Cst::size_type times=1000000, bool output=false)
{
    if (times > 2*cst.nodes()-cst.size())
        times = 2*cst.nodes()-cst.size();
    typedef typename Cst::size_type size_type;
    size_type cnt=0;
    write_R_output("cst","dfs and id","begin",times,cnt);
    typename Cst::const_iterator it = cst.begin();
    if (!output) {
        for (size_type i=0; i<times; ++i, ++it) {
            cnt += cst.id(*it);
        }
    } else {
        for (size_type i=0; i<times; ++i, ++it) {
            size_type id = cst.id(*it);
            std::cerr << id << std::endl;
            cnt += id;
        }
    }
    write_R_output("cst","dfs and id","end",times,cnt);
}
예제 #15
0
	//! Prefix increment of the iterator.
	iterator& operator++()
	{
		if (!m_valid) return *this;
		if (m_v == m_cst->root() and m_visited) {
			m_valid = false;
			return *this;
		}
		value_type w;
		if (!m_visited) { // go down, if possible
			if (m_cst->is_leaf(m_v)) {
				w = m_cst->sibling(m_v);  // determine sibling of leaf v
				if (w == m_cst->root()) { // if there exists no right sibling of the leaf v
										  //					w = m_cst->parent(m_v);
					w		  = parent();
					m_visited = true; // go up
				}
			} else { // v is not a leaf => go down the tree
				w = first_child();
			}
		} else { //
			w = m_cst->sibling(m_v);
			if (w == m_cst->root()) { // if there exists no right sibling
				w = parent();
			} else {
				m_visited = false;
			}
		}
		m_v = w;
		return *this;
	}
예제 #16
0
void test_cst_depth_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;
    std::vector<node_type> nodes;
    generate_nodes_from_random_leaves(cst, times, nodes, x);

    size_type cnt = 0;
    write_R_output("cst","depth","begin",nodes.size(),cnt);
    for (size_type i=0; i < nodes.size(); ++i) {
        cnt += cst.depth(nodes[i]);
    }
    write_R_output("cst","depth","end",nodes.size(),cnt);
}
예제 #17
0
void test_cst_sl_operation(const Cst& cst, typename Cst::size_type times=500, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;
    size_type n = cst.csa.size();
    if (times > n)
        times = n;

    std::vector<node_type> nodes(times);
    srand(x);
    // take \f$ times \f$ random leaves and calculate each parent
    for (size_type i=0; i<times; ++i) {
        nodes[i] = cst.parent(cst.select_leaf(rand()%n + 1));
    }

    size_type cnt=0;
    times = 0;
    write_R_output("cst","sl","begin",0,cnt);
    for (size_type i=0; i<nodes.size(); ++i) {
        node_type v = nodes[i];
//		std::cout<<"v="<<cst.lb(v)<<" "<<cst.rb(v)<<std::endl;
//		size_type d = cst.depth(v);
        while (v != cst.root()) { // while v is not the root
            ++cnt;
            v = cst.sl(v); // follow suffix link
//			if( cnt < 30 ){
//				std::cout<< cnt << " " << cst.lb(v) << " " << cst.rb(v) << " " << cst.depth(v) << std::endl;
//			}
//			size_type d2 = cst.depth(v);
//			if( d != d2+1 ){
//				std::cout<<"error at cnt "<<cnt<<" d="<<d<<" d2="<<d2<<std::endl;
//			}
//			d = d2;
        }
    }
    write_R_output("cst","sl","end",cnt,cnt);
}
예제 #18
0
void test_cst_lca_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17)
{
    typedef typename Cst::size_type size_type;
    typedef typename Cst::node_type node_type;
    // 	generate \f$2^{19}\f$ random pairs of leafs
    size_type n = cst.csa.size();
    uint64_t mask = (1<<20)-1;
    std::vector<node_type> nodes(1<<20);
    srand(x);
    for (size_type i=0; i < nodes.size(); ++i) {
        nodes[i] = cst.select_leaf(rand()%n + 1);
    }

    size_type cnt=0;
    write_R_output("cst","lca","begin",times,cnt);
    for (size_type i=0; i<times; ++i) {
        node_type v = cst.lca(nodes[(2*i) & mask], nodes[(2*i+1) & mask]);
        if (v == cst.root())
            cnt++;
//		if(i<30)
//			std::cout<<"lca("<<cst.lb(nodes[(2*i)&mask])<<","<<cst.lb(nodes[(2*i+1)&mask])<<")=("<<cst.lb(v)<<","<<cst.rb(v)<<")"<<std::endl;
    }
    write_R_output("cst","lca","end",times,cnt);
}
예제 #19
0
 louds_tree(const Cst& cst, const CstBfsIterator begin, const CstBfsIterator end):m_bv(), m_bv_select1(), m_bv_select0(), bv(m_bv) {
     bit_vector tmp_bv(4*cst.size(*begin) , 0); // resize the bit_vector to the maximal
     // possible size 2*2*#leaves in the tree
     size_type pos = 0;
     for (CstBfsIterator it = begin; it != end;) {
         tmp_bv[pos++] = 1;
         size_type size = it.size();
         ++it;
         pos += it.size()+1-size;
     }
     tmp_bv.resize(pos);
     m_bv = bit_vector_type(std::move(tmp_bv));
     util::init_support(m_bv_select1, &m_bv);
     util::init_support(m_bv_select0, &m_bv);
 }