Exemplo n.º 1
0
void
ComputeLongestPathLength1( const DFA& dfa
						 , typename DFA::state_id_t RootState
						 , valvec<LenType>& lens
						 )
{
	typedef typename DFA::state_id_t state_id_t;
	typedef typename DFA::transition_t transition_t;
	valvec<unsigned char> color(dfa.total_states(), 0);
	valvec<state_id_t> stack;
	valvec<state_id_t> index(dfa.total_states()+1, 0);
	valvec<state_id_t> backlink;
	state_id_t InverseRoot = DFA::null_state;

	// for reduce memory usage, use indexed adjacent difference structure
	// first pass traverse dfa from RootState:
	//   caculate the number of incoming transitions of each state
	//   index[state_id] = number of incoming transitions of state_id
	stack.push_back(RootState);
	color[RootState] = 1;
	while (!stack.empty()) {
		state_id_t parent = stack.back(); stack.pop_back();
		dfa.for_each_dest(parent,
			[&](state_id_t, const transition_t& t) {
				state_id_t child = t;
				if (color[child] < 1) {
					color[child] = 1;
					stack.push_back(child);
				}
				index[child+1]++;
			});
		if (!dfa.has_children(parent)) {
			assert(DFA::null_state == InverseRoot);
			if (DFA::null_state != InverseRoot) {
				throw std::logic_error("ComputeLongestPathLength: more than one InverseRoot");
			}
			InverseRoot = parent;
		}
	}
	assert(DFA::null_state != InverseRoot);
	if (DFA::null_state == InverseRoot) {
		throw std::logic_error("ComputeLongestPathLength: not found InverseRoot");
	}
	if (!dfa.is_term(InverseRoot)) {
		throw std::logic_error("ComputeLongestPathLength: InverseRoot is not final state");
	}
	if (0 != index[RootState]) {
		throw std::logic_error("ComputeLongestPathLength: found back link to RootState");
	}
	for (size_t i = 2; i < index.size(); ++i) index[i] += index[i-1];

	// second pass traverse dfa from RootState:
	//   backlink[index[s] ... index[s+1]) is the backlinks of state s
	stack.push_back(RootState);
	color[RootState] = 2;
	backlink.resize_no_init(index.back());
	{
		valvec<state_id_t> index2 = index;
		while (!stack.empty()) {
			state_id_t parent = stack.back(); stack.pop_back();
			dfa.for_each_dest(parent,
				[&](state_id_t, const transition_t& t) {
					state_id_t child = t;
					if (color[child] < 2) {
						color[child] = 2;
						stack.push_back(child);
					}
					backlink[index2[child]++] = parent;
				});
		}
	}

	// traverse the inversed graph structure of dfa
	//   compute longest_path_length of state s from RootState
	lens.resize(dfa.total_states(), 0);
	stack.push_back(InverseRoot);
	color[InverseRoot] = 3;
	while (!stack.empty()) {
		state_id_t parent = stack.back();
		switch (color[parent]) {
		default:
			assert(0);
			throw std::runtime_error("ComputeLongestPathLength: unexpected 1");
		case 3: {
			size_t beg = index[parent+0];
			size_t end = index[parent+1];
			color[parent] = 4;
			for (size_t i = beg; i < end; ++i) {
				state_id_t child = backlink[i];
				switch (color[child]) {
				default:
					assert(0);
					throw std::runtime_error("ComputeLongestPathLength: unexpected 2");
					break;
				case 2: // not touched
					stack.push_back(child);
					color[child] = 3;
					break;
				case 3: // forward edge
					break;
				case 4: // back edge
					if (child == parent)
						throw std::logic_error("ComputeLongestPathLength: found a self-loop");
					else
						throw std::logic_error("ComputeLongestPathLength: found a non-self-loop");
				case 5: // cross edge
					break;
				}
			}
			break; }
		case 4: {
			size_t beg = index[parent+0];
			size_t end = index[parent+1];
			for (size_t i = beg; i < end; ++i) {
				state_id_t child = backlink[i];
				assert(5 == color[child]);
				lens[parent] = std::max(lens[parent], lens[child] + 1);
			}
			color[parent] = 5;
			stack.pop_back();
			break; }
		case 5:
			break;
		}
	}
}
Exemplo n.º 2
0
void CreateSuffixDFA(DFA& A, typename DFA::state_id_t Root = initial_state) {
	typedef typename DFA::state_id_t state_id_t;
	typedef typename DFA::transition_t  trans_t;
	valvec<state_id_t> F, L, Q1, Q2, S(A.total_states(), Root);
	state_id_t MostFinal = A.null_state;
	S[Root] = A.null_state;
	auto SuffixNext = [&](state_id_t p, state_id_t q, byte_t c) {
		state_id_t t = A.null_state;
		while (p != Root) {
			t = A.state_move(p, c);
			if (A.null_state == t) {
				A.add_move(p, q, c);
				p = S[p];
			} else
				goto Next;
		}
		assert(A.null_state == t);
		A.add_move(Root, q, c);
		S[q] = Root;
		return;
	Next:
		if (L[p] + 1 == L[t] && t != q) {
			S[q] = t;
		} else {
			state_id_t r;
			if (t == q) {
				r = t;
			} else {
				r = A.clone_state(t);
				L.resize(A.total_states(), 0);
				S.resize(A.total_states(), Root);
				S[q] = r;
			}
			S[r] = S[t];
			S[t] = S[r];
			L[r] = L[p] + 1;
			assert(A.state_move(p, c) != A.null_state);
			while (A.null_state != p &&
				   L[A.state_move(p, c)] >= L[r])
		   	{
				A.set_move(p, r, c);
				p = S[p];
			}
		}
	};
	auto SuffixFinal = [&](state_id_t p) {
		while (A.null_state != (p = S[p]) && !A.is_term(p))
			A.set_term_bit(p);
	};
	ComputeLongestPathLength(A, Root, L);
	for (size_t i = 0; i < L.size(); ++i) {
		printf("L[%04zd] = %u\n", i, L[i]);
	}
	valvec<CharTarget<state_id_t> > MostFinal_src;

	A.resize_states(A.total_states());
	Q1.push_back(Root);
	simplebitvec mark(A.total_states());
//	valvec<CharTarget<state_id_t> > children;
	L.resize(A.total_states());
	while (!Q1.empty()) {
		for (state_id_t p0 : Q1) {
//			children.erase_all();
			A.for_each_move(p0,[&](state_id_t p,const trans_t& t,byte_t c){
				state_id_t q = t;
				bool q_has_children = A.has_children(q);
				if (mark.is0(q)) {
					if (!q_has_children) {
						assert(A.null_state == MostFinal);
						assert(A.is_term(q));
						MostFinal_src.emplace_back(c, p);
						MostFinal = q;
					}
					if (A.is_term(q))
						F.push_back(q);
					mark.set1(q);
					Q2.push_back(q);
				}
				if (q_has_children)
					SuffixNext(p, q, c);
			});
		}
		Q1.swap(Q2);
		Q2.erase_all();
	}
	assert(A.null_state != MostFinal);
	for (auto ct : MostFinal_src) {
		state_id_t src = ct.target;
		SuffixNext(src, MostFinal, ct.ch);
		SuffixFinal(MostFinal);
	}
	for (state_id_t p : F)
	   	SuffixFinal(p);
}