string bmlwriter::unescape(cstring val) { if (!val.startswith("-")) return val; string out; for (size_t i=1;i<val.length();i++) { if (val[i]=='-') { byte tmp; if (val[i+1]=='-') { i++; out += '-'; } else if (fromstringhex(val.substr(i+1, i+3), tmp)) { i += 2; out += tmp; } } else out += val[i]; } return out; }
void config_file_iterator::Impl::substitute_macros( cstring& where ) { m_post_subst_line.clear(); cstring::size_type pos; while( (pos = where.find( m_macro_ref_begin )) != cstring::npos ) { m_post_subst_line.append( where.begin(), pos ); where.trim_left( where.begin() + pos + m_macro_ref_begin.size() ); pos = where.find( m_macro_ref_end ); BOOST_RT_PARAM_VALIDATE_LOGIC( pos != cstring::npos, BOOST_RT_PARAM_LITERAL( "incomplete macro reference" ) ); cstring value = *get_macro_value( where.substr( 0, pos ), false ); m_post_subst_line.append( value.begin(), value.size() ); where.trim_left( where.begin() + pos + m_macro_ref_end.size() ); } if( !m_post_subst_line.empty() ) { m_post_subst_line.append( where.begin(), where.size() ); where = m_post_subst_line; } }
bool fromstringhex(cstring s, arrayvieww<byte> val) { if (val.size()*2 != s.length()) return false; bool ok = true; for (size_t i=0;i<val.size();i++) { ok &= fromstringhex(s.substr(i*2, i*2+2), val[i]); } return ok; }
cstring ReferenceMap::newName(cstring base) { // Maybe in the future we'll maintain information with per-scope identifiers, // but today we are content to generate globally-unique identifiers. // If base has a suffix of the form _(\d+), then we discard the suffix. // under the assumption that it is probably a generated suffix. // This will not impact correctness. unsigned len = base.size(); const char digits[] = "0123456789"; const char* s = base.c_str(); while (len > 0 && strchr(digits, s[len-1])) len--; if (len > 0 && base[len - 1] == '_') base = base.substr(0, len - 1); cstring name = cstring::make_unique(usedNames, base, '_'); usedNames.insert(name); return name; }
static bool match_back( cstring str, cstring pattern ) { return str.size() >= pattern.size() && str.substr( str.size() - pattern.size() ) == pattern; }
static bool match_front( cstring str, cstring pattern ) { return str.size() >= pattern.size() && str.substr( 0, pattern.size() ) == pattern; }
void HTML::entity_decode(string& out, cstring& in, bool isattr) { //this follows the HTML5 spec <https://html.spec.whatwg.org/#character-reference-state> //12.2.5.72 Character reference state if (isalnum(in[1])) { //12.2.5.73 Named character reference state const char * ent = find_entity(in.substr(1, ~0)); if (!ent) goto fail; size_t entlen = strlen(ent); size_t entlen_cons = 1+entlen; if (ent[entlen-1]!=';') { if (in[entlen_cons]==';') entlen_cons++; else { if (isattr && // If the character reference was consumed as part of an attribute, /* checked above */ // and the last character matched is not a U+003B SEMICOLON character (;), (in[entlen_cons]=='=' || // and the next input character is either a U+003D EQUALS SIGN character (=) isalnum(in[entlen_cons]))) // or an ASCII alphanumeric, { // then, for historical reasons, goto fail; // flush code points consumed as a character reference and switch to the return state } } } in = in.substr(entlen_cons, ~0); const char * entval = (ent+entlen+1); if (*entval == '?') entval++; out += entval; return; } else if (in[1]=='#') { //12.2.5.75 Numeric character reference state size_t ccode; if (in[2]=='x' || in[2]=='X') { //12.2.5.76 Hexademical character reference start state size_t n = 3; while (isxdigit(in[n])) n++; if (n==3) goto fail; if (!fromstringhex(in.substr(3, n), ccode)) ccode = 0xFFFD; if (in[n]==';') n++; in = in.substr(n, ~0); } else { //12.2.5.77 Decimal character reference start state size_t n = 2; while (isdigit(in[n])) n++; if (n==2) goto fail; if (!fromstring(in.substr(2, n), ccode)) ccode = 0xFFFD; if (in[n]==';') n++; in = in.substr(n, ~0); } //12.2.5.80 Numeric character reference end state if (ccode == 0x00) ccode = 0xFFFD; if (ccode > 0x10FFFF) ccode = 0xFFFD; if (ccode >= 0xD800 && ccode <= 0xDFFF) ccode = 0xFFFD; if (ccode >= 0x80 && ccode <= 0x9F) { #define X 0xFFFD static const uint16_t windows1252[32]={ //00 to 7F map to themselves 0x20AC, X, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, X, 0x017D, X, X, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, X, 0x017E, 0x0178, //A0 to FF map to themselves }; #undef X uint32_t newcp = windows1252[ccode-0x80]; // yes, this is in the spec if (newcp != 0xFFFD) ccode = newcp; } out += string::codepoint(ccode); return; } //else fall through fail: out += '&'; in = in.substr(1, ~0); }
//takes a single line, returns the first node in it //hasvalue is to differentiate 'foo' from 'foo='; only the former allows a multi-line value //returns true if found a node, false for error or nothing //if error, 'value' is the error message; if line was blank, 'value' is "" static bool bml_parse_inline_node(cstring& data, cstring& node, bool& hasvalue, cstring& value) { size_t nodestart = bml_size_white(data); if (nodestart == data.length()) { data = ""; value = ""; return false; } size_t nodelen = nodestart; while (isalnum(data[nodelen]) || data[nodelen]=='-' || data[nodelen]=='.') nodelen++; if (nodestart == nodelen) { value = "Invalid node name"; while (data[nodelen]!='\n' && data[nodelen]!='\0') nodelen++; data = data.substr(nodelen, ~0); return false; } node = cut(data, nodestart, nodelen, 0); switch (data[0]) { case '\0': case '\t': case ' ': { hasvalue = false; return true; } case ':': { hasvalue = true; int valstart = 1; while (data[valstart]==' ' || data[valstart]=='\t') valstart++; value = data.substr(valstart, ~0); data = ""; return true; } case '=': { if (data[1]=='"') { hasvalue = true; int valend = 2; while (data[valend]!='"' && data[valend]!='\0') valend++; if (data[valend]!='"' || !strchr(" \t", data[valend+1])) { while (data[valend]!='\0') valend++; data = data.substr(valend, ~0); value = "Broken quoted value"; return false; } value = cut(data, 2, valend, 1); return true; } else { hasvalue = true; int valend = 0; while (data[valend]!=' ' && data[valend]!='"' && data[valend]!='\0') valend++; if (data[valend]=='"') { while (data[valend]!='\0') valend++; data = data.substr(valend, ~0); value = "Broken quoted value"; return false; } value = cut(data, 1, valend, 0); return true; } } default: value = "Invalid node suffix"; return false; } }
static cstring cut(cstring& input, int skipstart, int cut, int skipafter) { cstring ret = input.substr(skipstart, cut); input = input.substr(cut+skipafter, ~0); return ret; }