typename t_cst::size_type forward_search( const t_cst& cst, typename t_cst::node_type& v, const typename t_cst::size_type d, const typename t_cst::char_type c, typename t_cst::size_type& char_pos, SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag() ) { unsigned char cc = cst.csa.char2comp[c]; // check if c occurs in the text of the csa if (cc==0 and cc!=c) // " " " " " " " " " " return 0; typename t_cst::size_type depth_node = cst.depth(v); if (d < depth_node) { // in an edge, no branching char_pos = cst.csa.psi[char_pos]; if (char_pos < cst.csa.C[cc] or char_pos >= cst.csa.C[cc+1]) return 0; return cst.size(v); } else if (d == depth_node) { // at a node, branching v = cst.child(v, c, char_pos); if (v == cst.root()) return 0; else return cst.size(v); } else { return 0; } }
t_rac extract( const t_cst& cst, const typename t_cst::node_type& v, SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag() ) { if (v==cst.root()) { return t_rac(0); } // first get the suffix array entry of the leftmost leaf in the subtree rooted at v typename t_cst::size_type begin = cst.csa[cst.lb(v)]; // then call the extract method on the compressed suffix array return extract<t_rac>(cst.csa, begin, begin + cst.depth(v) - 1); }
// Computes N_1+( * ab * ) uint64_t N1PlusFrontBack(const node_type& node, const node_type& node_rev, pattern_iterator pattern_begin, pattern_iterator pattern_end) const { auto timer = lm_bench::bench(timer_type::N1PlusFrontBack); // ASSUMPTION: lb, rb already identify the suffix array range corresponding to 'pattern' in // the forward tree // ASSUMPTION: pattern_begin, pattern_end cover just the pattern we're interested in (i.e., // we want N1+ dot pattern dot) uint64_t pattern_size = std::distance(pattern_begin, pattern_end); uint64_t back_N1plus_front = 0; // this is when the pattern matches a full edge in the CST if (!m_cst.is_leaf(node) && pattern_size == m_cst.depth(node)) { if (*pattern_begin == PAT_START_SYM) { return m_cst.degree(node); } auto w = m_cst.select_child(node, 1); auto root_id = m_cst.id(m_cst.root()); std::vector<uint64_t> new_pattern(pattern_begin, pattern_end); new_pattern.push_back(EOS_SYM); while (m_cst.id(w) != root_id) { auto lb_rev_stored = m_cst_rev.lb(node_rev); auto rb_rev_stored = m_cst_rev.rb(node_rev); uint64_t symbol = m_cst.edge(w, pattern_size + 1); assert(symbol != EOS_SYM); new_pattern.back() = symbol; // find the symbol to the right // (which is first in the reverse order) backward_search_wrapper(m_cst_rev.csa, lb_rev_stored, rb_rev_stored, symbol, lb_rev_stored, rb_rev_stored); back_N1plus_front += N1PlusBack(m_cst_rev.node(lb_rev_stored, rb_rev_stored), new_pattern.begin(), new_pattern.end()); w = m_cst.sibling(w); } return back_N1plus_front; } else { // special case, only one way of extending this pattern to the right if (*pattern_begin == PAT_START_SYM && *(pattern_end - 1) == PAT_END_SYM) { /* pattern must be 13xyz41 -> #P(*3xyz4*) == 0 */ return 0; } else if (*pattern_begin == PAT_START_SYM) { /* pattern must be 13xyzA -> #P(*3xyz*) == 1 */ return 1; } else { /* pattern must be *xyzA -> #P(*xyz*) == N1PlusBack */ return N1PlusBack(node_rev, pattern_begin, pattern_end); } } }
// Computes N_1+( abc * ) uint64_t N1PlusFront(const node_type& node, pattern_iterator pattern_begin, pattern_iterator pattern_end) const { auto timer = lm_bench::bench(timer_type::N1PlusFront); // ASSUMPTION: lb, rb already identify the suffix array range corresponding to 'pattern' in // the forward tree uint64_t pattern_size = std::distance(pattern_begin, pattern_end); uint64_t N1plus_front; if (!m_cst.is_leaf(node) && pattern_size == m_cst.depth(node)) { // pattern matches the edge label N1plus_front = m_cst.degree(node); } else { // pattern is part of the edge label N1plus_front = 1; } // adjust for end of sentence uint64_t symbol = *(pattern_end - 1); if (symbol == PAT_END_SYM) { N1plus_front -= 1; } return N1plus_front; }