コード例 #1
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	void convertToNFA(Automata& nfa, State& s, State& e) const
	{
		e = nfa.generateState();
		State childE;
		child->convertToNFA(nfa, s, childE);
		nfa.addTransition(childE, e, Transition::EPSILON);
		nfa.addTransition(e, s, Transition::EPSILON);
	}
コード例 #2
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	void convertToNFA(Automata& nfa, State& s, State& e) const
	{
		State leftS; State leftE;
		State rightS; State rightE;
		left->convertToNFA(nfa, leftS, leftE);
		right->convertToNFA(nfa, rightS, rightE);
		s = nfa.generateState();
		e = nfa.generateState();
		nfa.addTransition(s, leftS, Transition::EPSILON);
		nfa.addTransition(s, rightS, Transition::EPSILON);
		nfa.addTransition(leftE, e, Transition::EPSILON);
		nfa.addTransition(rightE, e, Transition::EPSILON);
	}
コード例 #3
0
ファイル: trie_generator.cpp プロジェクト: PutiZL/ironbee
//! Main
int main(int argc, char** argv)
{
    if (argc < 1 || argc > 2) {
        cout << "Usage: trie_generator [<chunk_size>]" << endl;
        cout << "Word list on standard in; one word per line." << endl;
        cout << "Automata on standard out; intermediate format." << endl;
        return 1;
    }

    try {
        size_t chunk_size = 0;
        if (argc == 2) {
            try {
                chunk_size = boost::lexical_cast<size_t>(argv[1]);
            }
            catch (boost::bad_lexical_cast) {
                cerr << "Invalid chunk size: " << argv[1] << endl;
                return 1;
            }
        }

        Automata a;

        list<string> words;
        string s;
        while (cin) {
            getline(cin, s);
            if (! s.empty()) {
                add_word(a, s);
            }
        }

        if (! a.start_node()) {
            cerr << "No automata generator.  Empty input?" << endl;
            return 1;
        }

        breadth_first(a, optimize_edges);
        deduplicate_outputs(a);

        a.metadata()["Output-Type"] = "string";

        write_automata(a, cout, chunk_size);
    }
    catch (const exception& e) {
        cerr << "Error: " << e.what() << endl;
        return 1;
    }

    return 0;
}
コード例 #4
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	void convertToNFA(Automata& nfa, State& s, State& e) const
	{
		s = nfa.generateState();
		e = nfa.generateState();
		State current = s;
		for (auto i = siblings.begin(); i != siblings.end(); ++i)
		{
			State siblingS;
			State siblingE;
			(*i)->convertToNFA(nfa, siblingS, siblingE);
			nfa.addTransition(current, siblingS, Transition::EPSILON);
			current = siblingE;
		}
		nfa.addTransition(current, e, Transition::EPSILON);
	}
コード例 #5
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	Parser(typename Encode<E>::PointerType input, Automata& nfa)
		: reader(input)
	{
		nfa.clear();
		if (reader.peek() == 0)
		{
			return;
		}
		auto ast = parseRE();
		if (reader.peek() != 0)
		{
			throw ParseError();
		}
		State s;
		State e;
		ast->convertToNFA(nfa, s, e);
		nfa.setStart(s);
		nfa.setTerminate(e);
	}
コード例 #6
0
ファイル: trie_generator.cpp プロジェクト: PutiZL/ironbee
/**
 * Add word @a s to automata @a a.
 */
void add_word(Automata& a, const string& s)
{
    if (! a.start_node()) {
        a.start_node() = boost::make_shared<Node>();
    }

    node_p current_node = a.start_node();
    size_t j = 0;
    while (j < s.length()) {
        uint8_t c = s[j];

        node_p next_node = find_next(current_node, c);
        if (! next_node) {
            break;
        }
        ++j;

        current_node = next_node;
    }

    while (j < s.length()) {
        uint8_t c = s[j];
        ++j;

        current_node->edges().push_back(Edge());
        Edge& edge = current_node->edges().back();
        edge.target() = boost::make_shared<Node>();
        edge.add(c);
        current_node = edge.target();
    }

    if (! current_node->first_output()) {
        byte_vector_t data;
        copy(s.begin(), s.end(), back_inserter(data));

        output_p output = boost::make_shared<Output>(data);
        current_node->first_output() = output;
    }
    else {
        cerr << "Warning: Duplicate word: " << s << endl;
    }
}
コード例 #7
0
ファイル: trie_generator.cpp プロジェクト: niubl/ironbee
/**
 * Add word @a s to automata @a a.
 */
void add_word(Automata& a, const string& s)
{
    if (! a.start_node()) {
        a.start_node() = make_shared<Node>();
    }

    node_p current_node = a.start_node();
    size_t j = 0;
    while (j < s.length()) {
        uint8_t c = s[j];

        node_p next_node = find_next(current_node, c);
        if (! next_node) {
            break;
        }
        ++j;

        current_node = next_node;
    }

    while (j < s.length()) {
        uint8_t c = s[j];
        ++j;

        current_node->edges().push_back(Edge());
        Edge& edge = current_node->edges().back();
        edge.target() = make_shared<Node>();
        edge.add(c);
        current_node = edge.target();
    }

    assert(! current_node->first_output());
    output_p output = make_shared<Output>();
    current_node->first_output() = output;

    IronAutomata::buffer_t content_buffer;
    IronAutomata::BufferAssembler assembler(content_buffer);
    assembler.append_object(uint32_t(1));

    output->content().assign(content_buffer.begin(), content_buffer.end());
}
コード例 #8
0
ファイル: trie_generator.cpp プロジェクト: niubl/ironbee
//! Main
int main(int argc, char** argv)
{
    if (argc < 1 || argc > 2) {
        cout << "Usage: trie_generator [<chunk_size>]" << endl;
        return 1;
    }

    try {
        size_t chunk_size = 0;
        if (argc == 2) {
            chunk_size = boost::lexical_cast<size_t>(argv[1]);
        }

        Automata a;

        list<string> words;
        string s;
        while (cin) {
            getline(cin, s);
            if (! s.empty()) {
                add_word(a, s);
            }
        }

        assert(a.start_node());

        breadth_first(a, optimize_edges);
        deduplicate_outputs(a);

        a.metadata()["Output-Type"] = "string";

        write_automata(a, cout, chunk_size);
    }
    catch (const exception& e) {
        cerr << "Error: " << e.what() << endl;
        return 1;
    }

    return 0;
}
コード例 #9
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	bool chainConcatenation(Automata& nfa, int count, bool addEps, State& s, State& e) const
	{
		if (count == 0)
		{
			return false;
		}
		s = nfa.generateState();
		e = nfa.generateState();
		State current = s;
		for (int i = 0; i < count; ++i)
		{
			State childS;
			State childE;
			child->convertToNFA(nfa, childS, childE);
			nfa.addTransition(current, childS, Transition::EPSILON);
			if (addEps)
			{
				nfa.addTransition(current, e, Transition::EPSILON);
			}
			current = childE;
		}
		nfa.addTransition(current, e, Transition::EPSILON);
		return true;
	}
コード例 #10
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	void convertToNFA(Automata& nfa, State& s, State& e) const
	{
		child->convertToNFA(nfa, s, e);
		nfa.addTransition(s, e, Transition::EPSILON);
	}
コード例 #11
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	void convertToNFA(Automata& nfa, State& s, State& e) const
	{
		s = nfa.generateState();
		e = nfa.generateState();
		nfa.addTransition(s, e, Transition::WILDCARD);
	}
コード例 #12
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	void convertToNFA(Automata& nfa, State& s, State& e) const
	{
		s = nfa.generateState();
		e = nfa.generateState();
		nfa.addTransition(s, e, rangeSet);
	}
コード例 #13
0
ファイル: experiment.cpp プロジェクト: heleifz/HRegex
	void convertToNFA(Automata& nfa, State& s, State& e) const
	{
		// special case : {count,} {,}
		if (maxCount == -1)
		{
			State tmpS;
			State tmpE;
			bool result = chainConcatenation(nfa, minCount, false, tmpS, tmpE);
			State kleenS;
			State kleenE;
			KleenNode(child).convertToNFA(nfa, kleenS, kleenE);
			if (result)
			{
				s = tmpS;
				nfa.addTransition(tmpE, kleenS, Transition::EPSILON);
				e = kleenE;
			}
			else
			{
				s = kleenS;
				e = kleenE;
			}
		}
		// other case : {count}, {count1, count2}, {,count2}
		else if (minCount <= maxCount && minCount >= 0)
		{
			State firstS; State firstE;
			State secondS; State secondE;
			bool resultFirst = chainConcatenation(nfa, minCount, false, firstS, firstE);
			bool resultSecond = chainConcatenation(nfa, maxCount - minCount, true, secondS, secondE);
			if (!resultFirst && !resultSecond)
			{
				s = nfa.generateState();
				e = nfa.generateState();
				nfa.addTransition(s, e, Transition::EPSILON);
				return;
			}
			if (resultFirst)
			{
				s = firstS;
			}
			else
			{
				s = secondS;
			}
			if (resultSecond)
			{
				e = secondE;
			}
			else
			{
				e = firstE;
			}
			if (resultFirst && resultSecond)
			{
				nfa.addTransition(firstE, secondS, Transition::EPSILON);
			}
		}
		else
		{
			throw ParseError();
		}
	}
コード例 #14
0
ファイル: test_intermediate.cpp プロジェクト: PutiZL/ironbee
// Very basic test; more significant testing will be done by end to end tests.
TEST(TestIntermediate, Writer)
{
    using namespace IronAutomata::Intermediate;

    stringstream s;

    {
        Automata a;
        node_p   node   = a.start_node()       = boost::make_shared<Node>();
        output_p output = node->first_output() = boost::make_shared<Output>();

        output->content().push_back('7');
        output->content().push_back('3');

        output_p other_output = output->next_output() = boost::make_shared<Output>();
        other_output->content().push_back('9');

        node->edges().push_back(Edge());
        Edge& edge = node->edges().back();

        node_p other_node = edge.target() = boost::make_shared<Node>();
        edge.add('5');

        write_automata(a, s);
        s.seekp(0);
    }

    IronAutomata::ostream_logger logger(cout);
    AutomataReader reader(logger);

    bool success = reader.read_from_istream(s);
    EXPECT_TRUE(success);
    EXPECT_TRUE(reader.clean());
    EXPECT_TRUE(reader.success());

    Automata a = reader.automata();

    EXPECT_FALSE(a.no_advance_no_output());
    ASSERT_TRUE(bool(a.start_node()));
    node_p node = a.start_node();
    EXPECT_TRUE(node->advance_on_default());
    ASSERT_TRUE(bool(node->first_output()));
    ASSERT_EQ(1UL, node->edges().size());
    EXPECT_FALSE(node->default_target());
    output_p output = node->first_output();
    ASSERT_EQ(2UL, output->content().size());
    EXPECT_EQ('7', output->content()[0]);
    EXPECT_EQ('3', output->content()[1]);
    ASSERT_TRUE(bool(output->next_output()));
    output = output->next_output();
    ASSERT_EQ(1UL, output->content().size());
    EXPECT_EQ('9', output->content()[0]);
    EXPECT_FALSE(output->next_output());
    Edge& edge = node->edges().front();
    EXPECT_TRUE(edge.advance());
    ASSERT_TRUE(bool(edge.target()));
    ASSERT_EQ(1UL, edge.size());
    EXPECT_EQ('5', *edge.begin());
    node = edge.target();
    EXPECT_FALSE(node->default_target());
    EXPECT_TRUE(node->edges().empty());
    EXPECT_TRUE(node->advance_on_default());
    EXPECT_FALSE(node->first_output());
}
コード例 #15
0
int main(int argc, char ** argv)
{
	if (argc != 2) return -1;
	char * search = argv[1];
	Automata * a = new Automata();
#if 1
	a->add_string("automata");
	a->add_string("tomata");
	a->add_string("tomi");
	a->add_string("automata");
	a->add_string("tomato");
	a->add_string("mata");
	a->add_string("amatti");
	a->add_string("car");
	a->add_string("application");
	a->add_string("bridge");
	a->add_string("cludge");
	a->add_string("ban");
	a->add_string("sandwich");
	a->add_string("tomi");
	a->add_string("back");
	a->add_string("stack");
	a->add_string("geek");
	a->add_string("beep");
	a->add_string("deep");
	a->add_string("zip");
	a->add_string("combined");
	a->add_string("goody");
	a->add_string("Automata constructed");
#endif
	a->add_string("9cf386a6cbbecdb999fd98ec89ea9ebecaa2cbb898fe97fb9eb0");
	a->add_string("96f98cacc1b4c7b393f792e683e094b4c0a8c1b292f49df194ba");
	a->add_string("4c2356761b6e1d69492d483c593a4e6e1a721b68482e472b4e60");
	a->add_string("5c5c906f4a46662626d9fcfcdc9c9c634642622222ddf8f0d090");
	a->add_string("b95a010000be");
	cout<<"Automata constructed \n";
	a->construct_fail_links();
	cout<<"Fail links constructed \n";
	cout<<"Match a string "<<search<<" returned : "<< a->is_match(search)<<endl;
	return 0;
}
コード例 #16
0
ファイル: main.cpp プロジェクト: jackiesteed/RegexCpp
int main(int argc, char* argv[])
{

	cout << "=============================================================================="<< endl;
	cout << "				 RegexCpp" << endl;
	cout << "  0. exit: Quit the program" << endl;
	cout << "  1. match <pattern> <text>: Do regex string matching" << endl;
	cout << "  2. debug <on|off>: Show running log or not, default not" << endl;
	cout << "  3. clear: Clear the screen" << endl;
	cout << "==============================================================================" << endl;
	char* cmdline = new char[MAXN];
	char* cmd;
	char* pattern;
	char* text;
	char* para;
	Parser parser;
	Automata automata;

	while(true)
	{
		cout << ">>>>";
		cin.getline(cmdline, MAXN);

		cmd = strtok(cmdline, " ");

		if(0 == strcmp(cmd, "exit")) 
		{
			return 0;
		}
		if(0 == strcmp(cmd, "match"))
		{
			pattern = strtok(NULL, " ");
			text = strtok(NULL, " ");
			if(pattern == NULL || text == NULL)
			{
				cout << "Incomplete parameters for [match] command " << endl;
				continue;
			}
			Node* root = parser.Parse(pattern); //½âÎöÊ÷
			if(root == NULL)
			{
				cout << red << "Error parsing the pattern!!" << flushcolor;
				continue;
			}
			
			State* start = automata.CreateAutomata(root);
			bool bingo = automata.Match(start, text);
		}
		else if(0 == strcmp(cmd, "debug"))
		{
			para = strtok(NULL, " ");
			if(para == NULL)
			{
				if(parser.GetDebug())
					cout << yellow << "Debug mode is on " << flushcolor;
				else 
					cout << yellow << "Debug mode is off " << flushcolor;
				continue;
			}
			if(0 == strcmp(para, "on"))
			{
				parser.SetDebug(true);
				automata.SetDebug(true);

			}
			else if(0 == strcmp(para, "off"))
			{
				parser.SetDebug(false);
				automata.SetDebug(false);
			}
			else
			{
				cout << red << "---------------------- Invalid parameters for [debug] command-------------------" << flushcolor;	
			}
		}
		else if(0 == strcmp(cmd, "clear"))
		{
			system("cls");
		}
		else
		{
			cout << red << "--------------------------------Unknown command-------------------------------" << flushcolor;	
		}
	}

	return 0;
}