Example #1
0
string bmlwriter::unescape(cstring val)
{
	if (!val.startswith("-")) return val;
	
	string out;
	for (size_t i=1;i<val.length();i++)
	{
		if (val[i]=='-')
		{
			byte tmp;
			if (val[i+1]=='-')
			{
				i++;
				out += '-';
			}
			else if (fromstringhex(val.substr(i+1, i+3), tmp))
			{
				i += 2;
				out += tmp;
			}
		}
		else out += val[i];
	}
	return out;
}
void
config_file_iterator::Impl::substitute_macros( cstring& where )
{
    m_post_subst_line.clear();
    cstring::size_type pos;

    while( (pos = where.find( m_macro_ref_begin )) != cstring::npos ) {
        m_post_subst_line.append( where.begin(), pos );

        where.trim_left( where.begin() + pos + m_macro_ref_begin.size() );

        pos = where.find( m_macro_ref_end );

        BOOST_RT_PARAM_VALIDATE_LOGIC( pos != cstring::npos, BOOST_RT_PARAM_LITERAL( "incomplete macro reference" ) );

        cstring value = *get_macro_value( where.substr( 0, pos ), false );
        m_post_subst_line.append( value.begin(), value.size() );

        where.trim_left( where.begin() + pos + m_macro_ref_end.size() );
    }

    if( !m_post_subst_line.empty() ) {
        m_post_subst_line.append( where.begin(), where.size() );
        where = m_post_subst_line;
    }
}
Example #3
0
bool fromstringhex(cstring s, arrayvieww<byte> val)
{
	if (val.size()*2 != s.length()) return false;
	bool ok = true;
	for (size_t i=0;i<val.size();i++)
	{
		ok &= fromstringhex(s.substr(i*2, i*2+2), val[i]);
	}
	return ok;
}
Example #4
0
cstring ReferenceMap::newName(cstring base) {
    // Maybe in the future we'll maintain information with per-scope identifiers,
    // but today we are content to generate globally-unique identifiers.

    // If base has a suffix of the form _(\d+), then we discard the suffix.
    // under the assumption that it is probably a generated suffix.
    // This will not impact correctness.
    unsigned len = base.size();
    const char digits[] = "0123456789";
    const char* s = base.c_str();
    while (len > 0 && strchr(digits, s[len-1])) len--;
    if (len > 0 && base[len - 1] == '_')
        base = base.substr(0, len - 1);

    cstring name = cstring::make_unique(usedNames, base, '_');
    usedNames.insert(name);
    return name;
}
 static bool         match_back( cstring str, cstring pattern )
 {
     return str.size() >= pattern.size() && str.substr( str.size() - pattern.size() ) == pattern;
 }
 static bool         match_front( cstring str, cstring pattern )
 {
     return str.size() >= pattern.size() && str.substr( 0, pattern.size() ) == pattern;
 }
Example #7
0
void HTML::entity_decode(string& out, cstring& in, bool isattr)
{
	//this follows the HTML5 spec <https://html.spec.whatwg.org/#character-reference-state>
	//12.2.5.72 Character reference state
	if (isalnum(in[1]))
	{
		//12.2.5.73 Named character reference state
		const char * ent = find_entity(in.substr(1, ~0));
		if (!ent) goto fail;
		size_t entlen = strlen(ent);
		size_t entlen_cons = 1+entlen;
		if (ent[entlen-1]!=';')
		{
			if (in[entlen_cons]==';') entlen_cons++;
			else
			{
				if (isattr &&                    // If the character reference was consumed as part of an attribute,
				    /* checked above */          // and the last character matched is not a U+003B SEMICOLON character (;),
				    (in[entlen_cons]=='=' ||     // and the next input character is either a U+003D EQUALS SIGN character (=) 
				      isalnum(in[entlen_cons]))) // or an ASCII alphanumeric,
				{                                // then, for historical reasons,
					goto fail;                     // flush code points consumed as a character reference and switch to the return state
				}
			}
		}
		in = in.substr(entlen_cons, ~0);
		const char * entval = (ent+entlen+1);
		if (*entval == '?') entval++;
		out += entval;
		return;
	}
	else if (in[1]=='#')
	{
		//12.2.5.75 Numeric character reference state
		size_t ccode;
		if (in[2]=='x' || in[2]=='X')
		{
			//12.2.5.76 Hexademical character reference start state
			size_t n = 3;
			while (isxdigit(in[n])) n++;
			if (n==3) goto fail;
			if (!fromstringhex(in.substr(3, n), ccode)) ccode = 0xFFFD;
			if (in[n]==';') n++;
			in = in.substr(n, ~0);
		}
		else
		{
			//12.2.5.77 Decimal character reference start state
			size_t n = 2;
			while (isdigit(in[n])) n++;
			if (n==2) goto fail;
			if (!fromstring(in.substr(2, n), ccode)) ccode = 0xFFFD;
			if (in[n]==';') n++;
			in = in.substr(n, ~0);
		}
		//12.2.5.80 Numeric character reference end state
		if (ccode == 0x00) ccode = 0xFFFD;
		if (ccode >  0x10FFFF) ccode = 0xFFFD;
		if (ccode >= 0xD800 && ccode <= 0xDFFF) ccode = 0xFFFD;
		if (ccode >= 0x80 && ccode <= 0x9F)
		{
#define X 0xFFFD
static const uint16_t windows1252[32]={
	//00 to 7F map to themselves
	0x20AC, X,      0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
	0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, X,      0x017D, X,     
	X,      0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
	0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, X,      0x017E, 0x0178,
	//A0 to FF map to themselves
};
#undef X
			uint32_t newcp = windows1252[ccode-0x80]; // yes, this is in the spec
			if (newcp != 0xFFFD) ccode = newcp;
		}
		out += string::codepoint(ccode);
		return;
	}
	//else fall through
	
fail:
	out += '&';
	in = in.substr(1, ~0);
}
Example #8
0
//takes a single line, returns the first node in it
//hasvalue is to differentiate 'foo' from 'foo='; only the former allows a multi-line value
//returns true if found a node, false for error or nothing
//if error, 'value' is the error message; if line was blank, 'value' is ""
static bool bml_parse_inline_node(cstring& data, cstring& node, bool& hasvalue, cstring& value)
{
	size_t nodestart = bml_size_white(data);
	if (nodestart == data.length())
	{
		data = "";
		value = "";
		return false;
	}
	
	size_t nodelen = nodestart;
	while (isalnum(data[nodelen]) || data[nodelen]=='-' || data[nodelen]=='.') nodelen++;
	if (nodestart == nodelen)
	{
		value = "Invalid node name";
		while (data[nodelen]!='\n' && data[nodelen]!='\0') nodelen++;
		data = data.substr(nodelen, ~0);
		return false;
	}
	node = cut(data, nodestart, nodelen, 0);
	switch (data[0])
	{
		case '\0':
		case '\t':
		case ' ':
		{
			hasvalue = false;
			return true;
		}
		case ':':
		{
			hasvalue = true;
			int valstart = 1;
			while (data[valstart]==' ' || data[valstart]=='\t') valstart++;
			value = data.substr(valstart, ~0);
			data = "";
			return true;
		}
		case '=':
		{
			if (data[1]=='"')
			{
				hasvalue = true;
				int valend = 2;
				while (data[valend]!='"' && data[valend]!='\0') valend++;
				if (data[valend]!='"' || !strchr(" \t", data[valend+1]))
				{
					while (data[valend]!='\0') valend++;
					data = data.substr(valend, ~0);
					value = "Broken quoted value";
					return false;
				}
				value = cut(data, 2, valend, 1);
				return true;
			}
			else
			{
				hasvalue = true;
				int valend = 0;
				while (data[valend]!=' ' && data[valend]!='"' && data[valend]!='\0') valend++;
				if (data[valend]=='"')
				{
					while (data[valend]!='\0') valend++;
					data = data.substr(valend, ~0);
					value = "Broken quoted value";
					return false;
				}
				value = cut(data, 1, valend, 0);
				return true;
			}
		}
		default:
			value = "Invalid node suffix";
			return false;
	}
}
Example #9
0
static cstring cut(cstring& input, int skipstart, int cut, int skipafter)
{
	cstring ret = input.substr(skipstart, cut);
	input = input.substr(cut+skipafter, ~0);
	return ret;
}