const HParser *init_parser(void) { // CORE H_RULE (digit, h_ch_range(0x30, 0x39)); H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); // AUX. H_RULE (plus, h_ch('+')); H_RULE (slash, h_ch('/')); H_ARULE(equals, h_ch('=')); H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); H_ARULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); H_ARULE(ws, h_many(space)); H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); // BUG sometimes inputs that should just don't parse. // It *seemed* to happen mostly with things like "bbbbaaaaBA==". // Using less actions seemed to make it less likely. return document; }
static void test_bug118(void) { // https://github.com/UpstandingHackers/hammer/issues/118 // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 HParseResult* p; const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; #define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); #undef MY_ENDIAN H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); p = h_parse(parser_weird, input, 6); g_check_cmp_int32(p->bit_length, ==, 44); h_parse_result_free(p); p = h_parse(parser_ok, input, 6); g_check_cmp_int32(p->bit_length, ==, 40); h_parse_result_free(p); }
void init_parser(void) { // CORE HParser *digit = h_ch_range(0x30, 0x39); HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); // AUX. HParser *plus = h_ch('+'); HParser *slash = h_ch('/'); HParser *equals = h_ch('='); HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); HParser *base64_3 = h_repeat_n(bsfdig, 4); HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); HParser *base64 = h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL); document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); }