const HParser *init_parser(void) { // CORE H_RULE (digit, h_ch_range(0x30, 0x39)); H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); // AUX. H_RULE (plus, h_ch('+')); H_RULE (slash, h_ch('/')); H_ARULE(equals, h_ch('=')); H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); H_ARULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); H_ARULE(ws, h_many(space)); H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); // BUG sometimes inputs that should just don't parse. // It *seemed* to happen mostly with things like "bbbbaaaaBA==". // Using less actions seemed to make it less likely. return document; }
void init_parser(void) { // CORE const HParser *digit = h_ch_range(0x30, 0x39); const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); // AUX. const HParser *plus = h_ch('+'); const HParser *slash = h_ch('/'); const HParser *equals = h_ch('='); const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); const HParser *bsfdig_4bit = h_choice( h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'), h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'), h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL); const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL); const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); const HParser *base64 = h_choice(base64_2, base64_1, NULL); // why does this parse "A=="?! // why does this parse "aaA=" but not "aA=="?! document = base64; }
const HParser* init_domain() { static const HParser *domain = NULL; if (domain) return domain; const HParser *letter = h_choice(h_ch_range('a', 'z'), h_ch_range('A', 'Z'), NULL); const HParser *let_dig = h_choice(letter, h_ch_range('0', '9'), NULL); const HParser *ldh_str = h_many1(h_choice(let_dig, h_ch('-'), NULL)); const HParser *label = h_attr_bool(h_sequence(letter, h_optional(h_sequence(h_optional(ldh_str), let_dig, NULL)), NULL), validate_label); /** * You could write it like this ... * HParser *indirect_subdomain = h_indirect(); * const HParser *subdomain = h_choice(label, * h_sequence(indirect_subdomain, * h_ch('.'), * label, * NULL), * NULL); * h_bind_indirect(indirect_subdomain, subdomain); * * ... but this is easier and equivalent */ const HParser *subdomain = h_sepBy1(label, h_ch('.')); domain = h_choice(subdomain, h_ch(' '), NULL); return domain; }
HParser* init_domain() { static HParser *ret = NULL; if (ret) return ret; H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); H_VARULE(label, h_sequence(letter, h_optional(h_sequence(h_optional(ldh_str), let_dig, NULL)), NULL)); H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL)); ret = domain; return ret; }
void init_parser(void) { // CORE HParser *digit = h_ch_range(0x30, 0x39); HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); // AUX. HParser *plus = h_ch('+'); HParser *slash = h_ch('/'); HParser *equals = h_ch('='); HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); HParser *base64_3 = h_repeat_n(bsfdig, 4); HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); HParser *base64 = h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL); document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); }