Ejemplo n.º 1
0
static
aligned_unique_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
                                   const CompileContext &cc,
                                   const ReportManager &rm, u32 *start_offset,
                                   u32 *small_region) {
    *start_offset = remove_leading_dots(rdfa);

    // Unleash the McClellan!
    set<dstate_id_t> accel_states;

    auto nfa = getDfa(rdfa, cc, rm, accel_states);
    if (!nfa) {
        DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
        return nullptr;
    }

    if (is_slow(rdfa, accel_states, roseQuality)) {
        DEBUG_PRINTF("is slow\n");
        *small_region = cc.grey.smallWriteLargestBufferBad;
        if (*small_region <= *start_offset) {
            return nullptr;
        }
        if (prune_overlong(rdfa, *small_region - *start_offset)) {
            if (rdfa.start_anchored == DEAD_STATE) {
                DEBUG_PRINTF("all patterns pruned out\n");
                return nullptr;
            }

            nfa = getDfa(rdfa, cc, rm, accel_states);
            if (!nfa) {
                DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
                assert(0); /* able to build orig dfa but not the trimmed? */
                return nullptr;
            }
        }
    } else {
        *small_region = cc.grey.smallWriteLargestBuffer;
    }

    assert(isDfaType(nfa->type));
    if (nfa->length > cc.grey.limitSmallWriteOutfixSize
            || nfa->length > cc.grey.limitDFASize) {
        DEBUG_PRINTF("smallwrite outfix size too large\n");
        return nullptr; /* this is just a soft failure - don't build smwr */
    }

    nfa->queueIndex = 0; /* dummy, small write API does not use queue */
    return nfa;
}
Ejemplo n.º 2
0
void FaRuleIr::writeHeader(std::ostream &os, std::string filename)
{
	FaGrammar *dfa = getDfa();
	if(dfa == NULL)
		return;		// nothing to do

	os << "// " << filename << " - automatically generated file (do not edit!)" << std::endl;
	if(!copyright.empty())
	os << "// " << copyright << std::endl;

	os << std::endl;
	os << "enum" << std::endl;
	os << "{" << std::endl;
	for(FaGrammar::iterator i = dfa->begin(); i != dfa->end(); ++i)
	{
		os << "\tRULE_" << prefix << *i->first << " = 0x" << std::hex << symTab->getNum(*i->first) << std::dec << "," << std::endl;
	}
	os << "\tRULE__" << prefix << "count = " << dfa->size() << std::endl;
	os << "};" << std::endl;
	
	os << std::endl;
	os << "enum" << std::endl;
	os << "{" << std::endl;
	for(std::set<std::string>::iterator i = tokens.begin(); i != tokens.end(); ++i)
	{
		os << "\tTOKEN_" << prefix << *i << " = 0x" << std::hex << symTab->getNum(*i) << std::dec << "," << std::endl;
	}
	os << "\tTOKEN__" << prefix << "count = " << tokens.size() << std::endl;
	os << "};" << std::endl;
	
	os << std::endl;
	os << "enum" << std::endl;
	os << "{" << std::endl;
	for(std::map<std::string, std::string>::iterator i = keywords.begin(); i != keywords.end(); ++i)
	{
		os << "\tKEYWORD_" << prefix << i->second << " = 0x" << std::hex << symTab->getNum(i->first) << std::dec << ",";
		os << "\t// " << i->first;
		os << std::endl;
	}
	os << "\tKEYWORD__" << prefix << "count = " << keywords.size() << std::endl;
	os << "};" << std::endl;

	os << std::endl;
}
Ejemplo n.º 3
0
void FaRuleIr::writeParser(std::ostream &os, std::string filename)
{
	FaGrammar *dfa = getDfa();
	if(dfa == NULL)
		return;		// nothing to do
	
	os << "// " << filename << " - automatically generated file (do not edit!)" << std::endl;
	if(!copyright.empty())
	os << "// " << copyright << std::endl;
	os << std::endl;
	
	for(FaGrammar::iterator i = dfa->begin(); i != dfa->end(); ++i)
	{
		os << "ParseNode *parse_" << prefix << "RULE_" << *i->first << "()" << std::endl;
		os << "{" << std::endl;
		os << "\tParseNode *node = new ParseNode(" << "RULE_" << prefix << *i->first << ", getLine(), getFile());" << std::endl;
		os << "\ttry" << std::endl;
		os << "\t{" << std::endl;
		
		FaNfa &fa = *i->second;
		os << "\t\tgoto L_" << fa.getStartState() << ";" << std::endl;
		
		for(unsigned j = 0; j < fa.getNumStates(); ++j)
		{
			FaState &st = fa.getState(j);
			os << "\t\tL_" << j << ":" << std::endl;
			os << "\t\tswitch(getToken())" << std::endl;
			os << "\t\t{" << std::endl;
			
			for(FaState::edge_iterator k = st.begin(); k != st.end(); ++k)
			{
				const std::string *symbol = k->getSymbol();
				unsigned target = k->getTarget();
				
				std::string token = *symbol;
				std::string comment;
				unsigned value = symTab->getNum(*symbol);
				
				bool isrule = false;
				if(value >= 0x10000 && value <= 0x1ffff)
					token = std::string("TOKEN_") + prefix + token;
				else
				if(value > 0x30000 && value <= 0x3ffff)
					isrule = true;
				else
				if(value >= 0x20000 && value <= 0x2ffff)
				{
					comment = token;
					token = std::string("KEYWORD_") + prefix + keywords[token];
				}
				
				if(isrule)
				{
					os << "\t\t\t// begin rule " << *symbol << std::endl;
					for(FaNfaToDfa::start_sym_iterator h = xlat->start_sym_begin(symbol); h != xlat->start_sym_end(symbol); ++h)
					{
						const std::string *sym = *h;
						
						std::string token = *sym;
						std::string comment;
						unsigned val = symTab->getNum(*sym);
						
						if(val >= 0x10000 && val <= 0x1ffff)
							token = std::string("TOKEN_") + prefix + token;
						else
						if(val >= 0x20000 && val <= 0x2ffff)
						{
							comment = token;
							token = std::string("KEYWORD_") + prefix + keywords[token];
						}

						os << "\t\t\tcase "	<< token << ":";
						if(!comment.empty())
							os << "\t// " << comment;
						os << std::endl;
					}
					
					os << "\t\t\t// end rule " << *symbol << std::endl;
					os << "\t\t\t\taddNonterminal(node, parse_" << prefix << "RULE_" << *symbol <<"());" << std::endl;
				}
				else
				{
					os << "\t\t\tcase "	<< token << ":";
					if(!comment.empty())
						os << "\t// " << comment;
					os << std::endl;
					os << "\t\t\t\taddTerminal(node);" << std::endl;
					os << "\t\t\t\tnextToken();" << std::endl;
				}
				os << "\t\t\t\tgoto L_" << target << ";" << std::endl;
			}
			
			os << "\t\t\tdefault:" << std::endl;
			if(st.getAccept())
				os << "\t\t\t\treturn node;\t// accept state" << std::endl;
			else
				os << "\t\t\t\tthrow ParseSyntaxError();" << std::endl;
			os << "\t\t\t\tbreak;" << std::endl;
			os << "\t\t}" << std::endl;
		}
		
		os << "\t}" << std::endl;

		os << "\tcatch(ParseSyntaxError &e)" << std::endl;
		os << "\t{" << std::endl;
		os << "\t\tdelete node;" << std::endl;
		os << "\t\tthrow e;" << std::endl;
		os << "\t}" << std::endl;
		os << "\treturn NULL;" << std::endl;
		os << "}" << std::endl;
		os << std::endl;
	}
	
	os << std::endl;
}
Ejemplo n.º 4
0
void FaRuleIr::writeLexer(std::ostream &os, std::string filename, std::map<std::string,std::string> &manualTokens
)
{
	FaGrammar *dfa = getDfa();
	if(dfa == NULL)
		return;		// nothing to do
	os << "// " << filename << " - automatically generated file (do not edit!)" << std::endl;
	if(!copyright.empty())
	os << "// " << copyright << std::endl;
	os << std::endl;
	
	char startChars[256];
	for(int i = 0; i < 256; ++i)
		startChars[i] = 0;
	for(std::map<std::string,std::string>::iterator i = manualTokens.begin(); i != manualTokens.end(); ++i)
	{
		std::string s = i->second;
		unsigned j = 0;
		while(j < s.size())
		{
			unsigned char c1 = s[j];
			unsigned char c2 = s[j + 1];
			for(; c1 <= c2; ++c1)
			{
				startChars[static_cast<unsigned>(c1)] = 1;
				///std::cout << (char)(c1) << " ";
			}
			j += 2;
		}
	}
	
	std::set<std::string> manualKeywords;	// unquoted
	for(std::map<std::string, std::string>::iterator i = keywords.begin(); i != keywords.end(); ++i)
	{
		std::string keyword = unquote(i->first);
		if(startChars[keyword[0]] != 0)
		{
			manualKeywords.insert(keyword);
			///std::cout << keyword << std::endl;
		}
	}
	
	os << "virtual void initManualKeywords()" << std::endl;
	os << "{" << std::endl;
	for(std::set<std::string>::iterator i = manualKeywords.begin(); i != manualKeywords.end(); ++i)
	{
		std::string s = std::string("\"") + *i + std::string("\"");
		os << "\taddManualKeyword(\"" << *i << "\", KEYWORD_" << prefix << keywords[s] << ");" << std::endl;
	}
	os << "}" << std::endl;
	
	std::map<std::string, std::string> tokens;	// text -> C++ match
	for(FaSymbolTable::sym_tab_iterator i = symTab->begin(); i != symTab->end(); ++i)
	{
		std::string symbol = i->first;
		if(symbol.empty())
			continue;
		
		// Match single-character tokens.
		if(symbol[0] == '\'' && symbol.size() == 3)
		{
			char c = symbol[1];
			std::string s = "";
			s += c;
			if(startChars[static_cast<unsigned>(c)] != 0)
			{
				std::cout << "warning: character \'" << c << "\' conflicts with a manual keyword!" << std::endl;
			}
			tokens[s] = symbol;
		}
		else
		if(symbol[0] == '\"' && symbol.size() >= 3)
		{
			std::string s = unquote(symbol);
			std::string t = std::string("KEYWORD_") + prefix + keywords[symbol];
			if(startChars[s[0]] == 0)
			{
				tokens[s] = t;
			}
		}
	}
	
	// Note: we need to make an NFA, then cvt to DFA.
	// Accept states may need ATTRIBUTES such as KEYWORD_rcISDEFAS.
	// We then know what to return if we get there, but is this theoretically sound?
	FaLexerDfa lexDfa;
	for(std::map<std::string, std::string>::iterator i = tokens.begin(); i != tokens.end(); ++i)
	{
		lexDfa.add(i->first, i->second);
	}
	
	os << std::endl;
	os << "unsigned lexNextToken()" << std::endl;
	os << "{" << std::endl;
	os << "\tgoto L0;" << std::endl;
	
	for(unsigned state = 0; state < lexDfa.states.size(); ++state)
	{
		os << "\tL" << state << ":" << std::endl;
		os << "\tswitch(getChar())" << std::endl;
		os << "\t{" << std::endl;
		
		FaLexerDfaState &st = lexDfa.states[state];
		for(std::map<char, UINT>::iterator i = st.edges.begin(); i != st.edges.end(); ++i)
		{
			os << "\t\tcase \'" << i->first << "\':" << std::endl;
			os << "\t\t\tnextChar();" << std::endl;
			os << "\t\t\tgoto L" << i->second << ";" << std::endl;
		}

		os << "\t\tdefault:" << std::endl;
		if(state == 0)
		{
			// this is the start state. can't be an accept state, then.
			os << "\t\t\treturn defaultHandler(0x80000000 + getChar());\t//start state" << std::endl;
		}
		else
		if(lexDfa.states[state].match.empty())
		{
			os << "\t\t\treturn 0x80000000 + getChar();\t// unexpected character" << std::endl;
		}
		else
		{
			os << "\t\t\treturn " << lexDfa.states[state].match << ";\t// accept state" << std::endl;
		}
		os << "\t\t\tbreak;" << std::endl;
		os << "\t}" << std::endl;
	}
	
	os << "\treturn 0;" << std::endl;
	os << "}" << std::endl;
	
	os << std::endl;
}