const HParser *init_parser(void) { // CORE H_RULE (digit, h_ch_range(0x30, 0x39)); H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); // AUX. H_RULE (plus, h_ch('+')); H_RULE (slash, h_ch('/')); H_ARULE(equals, h_ch('=')); H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); H_ARULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); H_ARULE(ws, h_many(space)); H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); // BUG sometimes inputs that should just don't parse. // It *seemed* to happen mostly with things like "bbbbaaaaBA==". // Using less actions seemed to make it less likely. return document; }
static void test_llk_zero_end(void) { HParserBackend be = PB_LLk; HParser *z = h_ch('\x00'); HParser *az = h_sequence(h_ch('a'), z, NULL); HParser *ze = h_sequence(z, h_end_p(), NULL); HParser *aze = h_sequence(h_ch('a'), z, h_end_p(), NULL); // some cases surrounding the bug g_check_parse_match (z, be, "\x00", 1, "u0"); g_check_parse_failed(z, be, "", 0); g_check_parse_match (ze, be, "\x00", 1, "(u0)"); g_check_parse_failed(ze, be, "\x00b", 2); g_check_parse_failed(ze, be, "", 0); g_check_parse_match (az, be, "a\x00", 2, "(u0x61 u0)"); g_check_parse_match (aze, be, "a\x00", 2, "(u0x61 u0)"); g_check_parse_failed(aze, be, "a\x00b", 3); // the following should not parse but did when the LL(k) backend failed to // check for the end of input, mistaking it for a zero character. g_check_parse_failed(az, be, "a", 1); g_check_parse_failed(aze, be, "a", 1); }
void init_parser(void) { // CORE HParser *digit = h_ch_range(0x30, 0x39); HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); // AUX. HParser *plus = h_ch('+'); HParser *slash = h_ch('/'); HParser *equals = h_ch('='); HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); HParser *base64_3 = h_repeat_n(bsfdig, 4); HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); HParser *base64 = h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL); document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); }
const HParser* init_rdata(uint16_t type) { static const HParser *parsers[RDATA_TYPE_MAX+1]; static int inited = 0; if (type >= sizeof(parsers)) return NULL; if (inited) return parsers[type]; H_RULE (domain, init_domain()); H_ARULE(cstr, init_character_string()); H_RULE (a, h_uint32()); H_RULE (ns, domain); H_RULE (md, domain); H_RULE (mf, domain); H_RULE (cname, domain); H_ARULE(soa, h_sequence(domain, // MNAME domain, // RNAME h_uint32(), // SERIAL h_uint32(), // REFRESH h_uint32(), // RETRY h_uint32(), // EXPIRE h_uint32(), // MINIMUM NULL)); H_RULE (mb, domain); H_RULE (mg, domain); H_RULE (mr, domain); H_VRULE(null, h_many(h_uint8())); H_RULE (wks, h_sequence(h_uint32(), h_uint8(), h_many(h_uint8()), NULL)); H_RULE (ptr, domain); H_RULE (hinfo, h_sequence(cstr, cstr, NULL)); H_RULE (minfo, h_sequence(domain, domain, NULL)); H_RULE (mx, h_sequence(h_uint16(), domain, NULL)); H_ARULE(txt, h_many1(cstr)); parsers[ 0] = NULL; // there is no type 0 parsers[ 1] = a; parsers[ 2] = ns; parsers[ 3] = md; parsers[ 4] = mf; parsers[ 5] = cname; parsers[ 6] = soa; parsers[ 7] = mb; parsers[ 8] = mg; parsers[ 9] = mr; parsers[10] = null; parsers[11] = wks; parsers[12] = ptr; parsers[13] = hinfo; parsers[14] = minfo; parsers[15] = mx; parsers[16] = txt; // All parsers must consume their input exactly. for(uint16_t i; i<sizeof(parsers); i++) { if(parsers[i]) { parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL), act_index0); } } inited = 1; return parsers[type]; }