예제 #1
0
typename t_cst::size_type
forward_search(
    const t_cst& cst,
    typename t_cst::node_type& v,
    const typename t_cst::size_type d,
    const typename t_cst::char_type c,
    typename t_cst::size_type& char_pos,
    SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
)
{
    unsigned char cc = cst.csa.char2comp[c]; // check if c occurs in the text of the csa
    if (cc==0 and cc!=c)                     //   "    " "    "     "  "    "   "  "   "
        return 0;
    typename t_cst::size_type depth_node = cst.depth(v);
    if (d < depth_node) {         // in an edge, no  branching
        char_pos = cst.csa.psi[char_pos];
        if (char_pos < cst.csa.C[cc] or char_pos >= cst.csa.C[cc+1])
            return 0;
        return cst.size(v);
    } else if (d == depth_node) { // at a node,  branching
        v = cst.child(v, c, char_pos);
        if (v == cst.root())
            return 0;
        else
            return cst.size(v);
    } else {
        return 0;
    }
}
예제 #2
0
t_rac extract(
    const t_cst& cst,
    const typename t_cst::node_type& v,
    SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
)
{
    if (v==cst.root()) {
        return t_rac(0);
    }
    // first get the suffix array entry of the leftmost leaf in the subtree rooted at v
    typename t_cst::size_type begin = cst.csa[cst.lb(v)];
    // then call the extract method on the compressed suffix array
    return extract<t_rac>(cst.csa, begin, begin + cst.depth(v) - 1);
}
예제 #3
0
 void load(std::istream& in)
 {
     m_cst.load(in);
     m_cst_rev.load(in);
     sdsl::load(m_precomputed, in);
     sdsl::load(m_vocab, in);
 }
예제 #4
0
    //  Computes N_1+( * ab * )
    uint64_t N1PlusFrontBack(const node_type& node, const node_type& node_rev,
                             pattern_iterator pattern_begin, pattern_iterator pattern_end) const
    {
        auto timer = lm_bench::bench(timer_type::N1PlusFrontBack);
        // ASSUMPTION: lb, rb already identify the suffix array range corresponding to 'pattern' in
        // the forward tree
        // ASSUMPTION: pattern_begin, pattern_end cover just the pattern we're interested in (i.e.,
        // we want N1+ dot pattern dot)
        uint64_t pattern_size = std::distance(pattern_begin, pattern_end);
        uint64_t back_N1plus_front = 0;

        // this is when the pattern matches a full edge in the CST
        if (!m_cst.is_leaf(node) && pattern_size == m_cst.depth(node)) {
            if (*pattern_begin == PAT_START_SYM) {
                return m_cst.degree(node);
            }
            auto w = m_cst.select_child(node, 1);
            auto root_id = m_cst.id(m_cst.root());
            std::vector<uint64_t> new_pattern(pattern_begin, pattern_end);
            new_pattern.push_back(EOS_SYM);
            while (m_cst.id(w) != root_id) {
                auto lb_rev_stored = m_cst_rev.lb(node_rev);
                auto rb_rev_stored = m_cst_rev.rb(node_rev);
                uint64_t symbol = m_cst.edge(w, pattern_size + 1);
                assert(symbol != EOS_SYM);
                new_pattern.back() = symbol;
                // find the symbol to the right
                // (which is first in the reverse order)
                backward_search_wrapper(m_cst_rev.csa, lb_rev_stored, rb_rev_stored, symbol, lb_rev_stored,
                                        rb_rev_stored);

                back_N1plus_front += N1PlusBack(m_cst_rev.node(lb_rev_stored, rb_rev_stored),
                                                new_pattern.begin(), new_pattern.end());
                w = m_cst.sibling(w);
            }
            return back_N1plus_front;
        } else {
            // special case, only one way of extending this pattern to the right
            if (*pattern_begin == PAT_START_SYM && *(pattern_end - 1) == PAT_END_SYM) {
                /* pattern must be 13xyz41 -> #P(*3xyz4*) == 0 */
                return 0;
            } else if (*pattern_begin == PAT_START_SYM) {
                /* pattern must be 13xyzA -> #P(*3xyz*) == 1 */
                return 1;
            } else {
                /* pattern must be *xyzA -> #P(*xyz*) == N1PlusBack */
                return N1PlusBack(node_rev, pattern_begin, pattern_end);
            }
        }
    }
예제 #5
0
 void swap(index_succinct& a)
 {
     if (this != &a) {
         m_cst.swap(a.m_cst);
         m_cst_rev.swap(a.m_cst_rev);
         std::swap(m_precomputed, a.m_precomputed);
         m_vocab.swap(a.m_vocab);
     }
 }
예제 #6
0
    // Computes N_1+( abc * )
    uint64_t N1PlusFront(const node_type& node,
                         pattern_iterator pattern_begin, pattern_iterator pattern_end) const
    {
        auto timer = lm_bench::bench(timer_type::N1PlusFront);
        // ASSUMPTION: lb, rb already identify the suffix array range corresponding to 'pattern' in
        // the forward tree
        uint64_t pattern_size = std::distance(pattern_begin, pattern_end);
        uint64_t N1plus_front;
        if (!m_cst.is_leaf(node) && pattern_size == m_cst.depth(node)) {
            // pattern matches the edge label
            N1plus_front = m_cst.degree(node);
        } else {
            // pattern is part of the edge label
            N1plus_front = 1;
        }

        // adjust for end of sentence
        uint64_t symbol = *(pattern_end - 1);
        if (symbol == PAT_END_SYM) {
            N1plus_front -= 1;
        }
        return N1plus_front;
    }
예제 #7
0
    size_type serialize(std::ostream& out, sdsl::structure_tree_node* v = NULL,
                        std::string name = "") const
    {
        sdsl::structure_tree_node* child
            = sdsl::structure_tree::add_child(v, name, sdsl::util::class_name(*this));
        size_type written_bytes = 0;
        written_bytes += m_cst.serialize(out, child, "CST");
        written_bytes += m_cst_rev.serialize(out, child, "CST_REV");
        written_bytes += m_precomputed.serialize(out, child, "Precomputed_Stats");
        written_bytes += sdsl::serialize(m_vocab, out, child, "Vocabulary");

        sdsl::structure_tree::add_size(child, written_bytes);

        return written_bytes;
    }
예제 #8
0
typename t_cst::size_type
forward_search(const t_cst& cst,
               typename t_cst::node_type& v,
               typename t_cst::size_type d,
               t_pat_iter begin,
               t_pat_iter end,
               typename t_cst::size_type& char_pos,
               SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
              )
{
    if (begin==end)
        return cst.size(v);
    typename t_cst::size_type size=0;
    t_pat_iter it = begin;
    while (it != end and (size=forward_search(cst, v, d, *it, char_pos))) {
        ++d;
        ++it;
    }
    return size;
}
예제 #9
0
	iterator_type end() const { return iterator_type(m_cst, m_cst->root()); }
예제 #10
0
	iterator_type begin() const { return iterator_type(m_cst, m_cst->select_child(m_parent, 1)); }
예제 #11
0
	size_type	 size() { return m_cst->degree(m_parent); }
예제 #12
0
	node_type operator[](size_type i) const
	{
		return m_cst->select_child(m_parent, i + 1);
	} // enumeration starts with 1 not 0
예제 #13
0
	iterator_type& operator++()
	{
		m_cur_node = m_cst->sibling(m_cur_node);
		return *this;
	}