void init_parser(void) { // CORE const HParser *digit = h_ch_range(0x30, 0x39); const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); // AUX. const HParser *plus = h_ch('+'); const HParser *slash = h_ch('/'); const HParser *equals = h_ch('='); const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); const HParser *bsfdig_4bit = h_choice( h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'), h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'), h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL); const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL); const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); const HParser *base64 = h_choice(base64_2, base64_1, NULL); // why does this parse "A=="?! // why does this parse "aaA=" but not "aA=="?! document = base64; }
const HParser *init_parser(void) { // CORE H_RULE (digit, h_ch_range(0x30, 0x39)); H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); // AUX. H_RULE (plus, h_ch('+')); H_RULE (slash, h_ch('/')); H_ARULE(equals, h_ch('=')); H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); H_ARULE(base64, h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL)); H_ARULE(ws, h_many(space)); H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); // BUG sometimes inputs that should just don't parse. // It *seemed* to happen mostly with things like "bbbbaaaaBA==". // Using less actions seemed to make it less likely. return document; }
static void test_bug118(void) { // https://github.com/UpstandingHackers/hammer/issues/118 // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 HParseResult* p; const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; #define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); #undef MY_ENDIAN H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); p = h_parse(parser_weird, input, 6); g_check_cmp_int32(p->bit_length, ==, 44); h_parse_result_free(p); p = h_parse(parser_ok, input, 6); g_check_cmp_int32(p->bit_length, ==, 40); h_parse_result_free(p); }
// dummy! int test_lalr(void) { HAllocator *mm__ = &system_allocator; /* E -> E '-' T | T T -> '(' E ')' | 'n' -- also try [0-9] for the charset paths */ HParser *n = h_ch('n'); HParser *E = h_indirect(); HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); h_bind_indirect(E, E_); HParser *p = E; printf("\n==== G R A M M A R ====\n"); HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); if (g == NULL) { fprintf(stderr, "h_cfgrammar failed\n"); return 1; } h_pprint_grammar(stdout, g, 0); printf("\n==== D F A ====\n"); HLRDFA *dfa = h_lr0_dfa(g); if (dfa) { h_pprint_lrdfa(stdout, g, dfa, 0); } else { fprintf(stderr, "h_lalr_dfa failed\n"); } printf("\n==== L R ( 0 ) T A B L E ====\n"); HLRTable *table0 = h_lr0_table(g, dfa); if (table0) { h_pprint_lrtable(stdout, g, table0, 0); } else { fprintf(stderr, "h_lr0_table failed\n"); } h_lrtable_free(table0); printf("\n==== L A L R T A B L E ====\n"); if (h_compile(p, PB_LALR, NULL)) { fprintf(stderr, "does not compile\n"); return 2; } h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0); printf("\n==== P A R S E R E S U L T ====\n"); HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13); if (res) { h_pprint(stdout, res->ast, 0, 2); } else { printf("no parse\n"); } return 0; }
void dnp3_p_init_transport(void) { H_RULE(bit, h_bits(1, false)); H_RULE(byte, h_uint8()); H_RULE(fir, bit); H_RULE(fin, bit); H_RULE(seqno, h_bits(6, false)); H_RULE(hdr, h_sequence(fin, fir, seqno, NULL)); // big-endian H_ARULE(segment, h_sequence(hdr, h_many(byte), NULL)); // XXX is there a minimum number of bytes in the transport payload? dnp3_p_transport_segment = segment; }
const HParser* init_domain() { static const HParser *domain = NULL; if (domain) return domain; const HParser *letter = h_choice(h_ch_range('a', 'z'), h_ch_range('A', 'Z'), NULL); const HParser *let_dig = h_choice(letter, h_ch_range('0', '9'), NULL); const HParser *ldh_str = h_many1(h_choice(let_dig, h_ch('-'), NULL)); const HParser *label = h_attr_bool(h_sequence(letter, h_optional(h_sequence(h_optional(ldh_str), let_dig, NULL)), NULL), validate_label); /** * You could write it like this ... * HParser *indirect_subdomain = h_indirect(); * const HParser *subdomain = h_choice(label, * h_sequence(indirect_subdomain, * h_ch('.'), * label, * NULL), * NULL); * h_bind_indirect(indirect_subdomain, subdomain); * * ... but this is easier and equivalent */ const HParser *subdomain = h_sepBy1(label, h_ch('.')); domain = h_choice(subdomain, h_ch(' '), NULL); return domain; }
static void test_cfg_many_seq(void) { HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL)); g_check_parse_match(p, PB_LLk, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); g_check_parse_match(p, PB_GLR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))"); // these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty // reshape on h_many. }
void dnp3_p_init_binoutcmd(void) { H_RULE (bit, h_bits(1, false)); H_RULE (cs, bit); H_RULE (status, h_bits(7, false)); H_ARULE(notime, h_sequence(status, cs, NULL)); H_ARULE(abstime, h_sequence(status, cs, dnp3_p_dnp3time, NULL)); H_RULE (tcc, h_int_range(h_bits(2, false), 0, 2)); H_ARULE(crob, h_sequence(h_bits(4, false), // op type bit, // queue flag (obsolete) bit, // clear flag tcc, h_uint8(), // count h_uint32(), // on-time [ms] h_uint32(), // off-time [ms] status, // 7 bits dnp3_p_reserved(1), NULL)); H_ARULE(packed, bit); // group 12 (binary output commands)... dnp3_p_g12v1_binoutcmd_crob_oblock = dnp3_p_oblock(G_V(BINOUTCMD, CROB), crob); dnp3_p_g12v2_binoutcmd_pcb_oblock = dnp3_p_single(G_V(BINOUTCMD, PCB), crob); dnp3_p_g12v3_binoutcmd_pcm_oblock = dnp3_p_oblock_packed(G_V(BINOUTCMD, PCM), packed); dnp3_p_g12v3_binoutcmd_pcm_rblock = dnp3_p_specific_rblock(G_V(BINOUTCMD, PCM)); dnp3_p_binoutcmd_rblock = dnp3_p_rblock(G(BINOUTCMD), V(BINOUTCMD, CROB), V(BINOUTCMD, PCB), V(BINOUTCMD, PCM), 0); // group 13 (binary output command events)... H_RULE(oblock_notime, dnp3_p_oblock(G_V(BINOUTCMDEV, NOTIME), notime)); H_RULE(oblock_abstime, dnp3_p_oblock(G_V(BINOUTCMDEV, ABSTIME), abstime)); dnp3_p_binoutcmdev_rblock = dnp3_p_rblock(G(BINOUTCMDEV), V(BINOUTCMDEV, NOTIME), V(BINOUTCMDEV, ABSTIME), 0); dnp3_p_binoutcmdev_oblock = h_choice(oblock_notime, oblock_abstime, NULL); }
HParser* init_domain() { static HParser *ret = NULL; if (ret) return ret; H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); H_VARULE(label, h_sequence(letter, h_optional(h_sequence(h_optional(ldh_str), let_dig, NULL)), NULL)); H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL)); ret = domain; return ret; }
static void test_llk_zero_end(void) { HParserBackend be = PB_LLk; HParser *z = h_ch('\x00'); HParser *az = h_sequence(h_ch('a'), z, NULL); HParser *ze = h_sequence(z, h_end_p(), NULL); HParser *aze = h_sequence(h_ch('a'), z, h_end_p(), NULL); // some cases surrounding the bug g_check_parse_match (z, be, "\x00", 1, "u0"); g_check_parse_failed(z, be, "", 0); g_check_parse_match (ze, be, "\x00", 1, "(u0)"); g_check_parse_failed(ze, be, "\x00b", 2); g_check_parse_failed(ze, be, "", 0); g_check_parse_match (az, be, "a\x00", 2, "(u0x61 u0)"); g_check_parse_match (aze, be, "a\x00", 2, "(u0x61 u0)"); g_check_parse_failed(aze, be, "a\x00b", 3); // the following should not parse but did when the LL(k) backend failed to // check for the end of input, mistaking it for a zero character. g_check_parse_failed(az, be, "a", 1); g_check_parse_failed(aze, be, "a", 1); }
void init_parser(void) { // CORE HParser *digit = h_ch_range(0x30, 0x39); HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); // AUX. HParser *plus = h_ch('+'); HParser *slash = h_ch('/'); HParser *equals = h_ch('='); HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); HParser *base64_3 = h_repeat_n(bsfdig, 4); HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); HParser *base64 = h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL); document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); }
void main(void) { char line[100]; int done; printf("\nH-Sequence Testing Program (Recursive Version)"); printf("\n=============================================="); for (done = NO; !done; ) { printf("\n\nInput a string of 0 and 1 --> "); if (gets(line) != NULL) if (h_sequence(line) == YES) printf("\n*** Input is a H sequence ***"); else printf("\n*** Input is NOT a H sequence ***"); else done = YES; } }
static void test_lalr_charset_lhs(void) { HParserBackend be = PB_LALR; HParser *p = h_many(h_choice(h_sequence(h_ch('A'), h_ch('B'), NULL), h_in((uint8_t*)"AB",2), NULL)); // the above would abort because of an unhandled case in trying to resolve // a conflict where an item's left-hand-side was an HCF_CHARSET. // however, the compile should fail - the conflict cannot be resolved. if(h_compile(p, be, NULL) == 0) { g_test_message("LALR compile didn't detect ambiguous grammar"); // it says it compiled it - well, then it should parse it! // (this helps us see what it thinks it should be doing.) g_check_parse_match(p, be, "AA",2, "(u0x41 u0x41)"); g_check_parse_match(p, be, "AB",2, "((u0x41 u0x42))"); g_test_fail(); return; } }
void dnp3_p_init_counter(void) { H_RULE (bit, h_bits(1,false)); H_RULE (ignore, h_ignore(bit)); H_RULE (reserved, dnp3_p_reserved(1)); H_ARULE(flags, h_sequence(bit, // ONLINE bit, // RESTART bit, // COMM_LOST bit, // REMOTE_FORCED bit, // LOCAL_FORCED ignore, // (ROLLOVER - obsolete) bit, // DISCONTINUITY reserved, NULL)); H_RULE (val32, h_uint32()); H_RULE (val16, h_uint16()); H_ARULE(ctr32, val32); H_ARULE(ctr16, val16); H_ARULE(ctr32_flag, h_sequence(flags, val32, NULL)); H_ARULE(ctr16_flag, h_sequence(flags, val16, NULL)); H_ARULE(ctr32_flag_t, h_sequence(flags, val32, dnp3_p_dnp3time, NULL)); H_ARULE(ctr16_flag_t, h_sequence(flags, val16, dnp3_p_dnp3time, NULL)); // group 20: counters... H_RULE(oblock_32bit_flag, dnp3_p_oblock(G_V(CTR, 32BIT), ctr32_flag)); H_RULE(oblock_16bit_flag, dnp3_p_oblock(G_V(CTR, 16BIT), ctr16_flag)); H_RULE(oblock_32bit_noflag, dnp3_p_oblock(G_V(CTR, 32BIT_NOFLAG), ctr32)); H_RULE(oblock_16bit_noflag, dnp3_p_oblock(G_V(CTR, 16BIT_NOFLAG), ctr16)); dnp3_p_ctr_rblock = dnp3_p_rblock(G(CTR), V(CTR, 32BIT), V(CTR, 16BIT), V(CTR, 32BIT_NOFLAG), V(CTR, 32BIT_NOFLAG), 0); dnp3_p_ctr_fblock = dnp3_p_specific_rblock(G(CTR), DNP3_VARIATION_ANY); dnp3_p_ctr_oblock = h_choice(oblock_32bit_flag, oblock_16bit_flag, oblock_32bit_noflag, oblock_16bit_noflag, NULL); // group 21: frozen counters... H_RULE(oblock_frz32bit_flag, dnp3_p_oblock(G_V(FROZENCTR, 32BIT), ctr32_flag)); H_RULE(oblock_frz16bit_flag, dnp3_p_oblock(G_V(FROZENCTR, 16BIT), ctr16_flag)); H_RULE(oblock_frz32bit_flag_t, dnp3_p_oblock(G_V(FROZENCTR, 32BIT_TIME), ctr32_flag_t)); H_RULE(oblock_frz16bit_flag_t, dnp3_p_oblock(G_V(FROZENCTR, 16BIT_TIME), ctr16_flag_t)); H_RULE(oblock_frz32bit_noflag, dnp3_p_oblock(G_V(FROZENCTR, 32BIT_NOFLAG), ctr32)); H_RULE(oblock_frz16bit_noflag, dnp3_p_oblock(G_V(FROZENCTR, 16BIT_NOFLAG), ctr16)); dnp3_p_frozenctr_rblock = dnp3_p_rblock(G(FROZENCTR), V(FROZENCTR, 32BIT), V(FROZENCTR, 16BIT), V(FROZENCTR, 32BIT_TIME), V(FROZENCTR, 16BIT_TIME), V(FROZENCTR, 32BIT_NOFLAG), V(FROZENCTR, 32BIT_NOFLAG), 0); dnp3_p_frozenctr_oblock = h_choice(oblock_frz32bit_flag, oblock_frz16bit_flag, oblock_frz32bit_flag_t, oblock_frz16bit_flag_t, oblock_frz32bit_noflag, oblock_frz16bit_noflag, NULL); // group 22: counter events... H_RULE(oblock_ev32bit_flag, dnp3_p_oblock(G_V(CTREV, 32BIT), ctr32_flag)); H_RULE(oblock_ev16bit_flag, dnp3_p_oblock(G_V(CTREV, 16BIT), ctr16_flag)); H_RULE(oblock_ev32bit_flag_t, dnp3_p_oblock(G_V(CTREV, 32BIT_TIME), ctr32_flag_t)); H_RULE(oblock_ev16bit_flag_t, dnp3_p_oblock(G_V(CTREV, 16BIT_TIME), ctr16_flag_t)); dnp3_p_ctrev_rblock = dnp3_p_rblock(G(CTREV), V(CTREV, 32BIT), V(CTREV, 16BIT), V(CTREV, 32BIT_TIME), V(CTREV, 16BIT_TIME), 0); dnp3_p_ctrev_oblock = h_choice(oblock_ev32bit_flag, oblock_ev16bit_flag, oblock_ev32bit_flag_t, oblock_ev16bit_flag_t, NULL); // group 21: frozen counter events... H_RULE(oblock_frzev32bit_flag, dnp3_p_oblock(G_V(FROZENCTREV, 32BIT), ctr32_flag)); H_RULE(oblock_frzev16bit_flag, dnp3_p_oblock(G_V(FROZENCTREV, 16BIT), ctr16_flag)); H_RULE(oblock_frzev32bit_flag_t, dnp3_p_oblock(G_V(FROZENCTREV, 32BIT_TIME), ctr32_flag_t)); H_RULE(oblock_frzev16bit_flag_t, dnp3_p_oblock(G_V(FROZENCTREV, 16BIT_TIME), ctr16_flag_t)); dnp3_p_frozenctrev_rblock = dnp3_p_rblock(G(FROZENCTREV), V(FROZENCTREV, 32BIT), V(FROZENCTREV, 16BIT), V(FROZENCTREV, 32BIT_TIME), V(FROZENCTREV, 16BIT_TIME), 0); dnp3_p_frozenctrev_oblock = h_choice(oblock_frzev32bit_flag, oblock_frzev16bit_flag, oblock_frzev32bit_flag_t, oblock_frzev16bit_flag_t, NULL); }
const HParser* init_rdata(uint16_t type) { static const HParser *parsers[RDATA_TYPE_MAX+1]; static int inited = 0; if (type >= sizeof(parsers)) return NULL; if (inited) return parsers[type]; H_RULE (domain, init_domain()); H_ARULE(cstr, init_character_string()); H_RULE (a, h_uint32()); H_RULE (ns, domain); H_RULE (md, domain); H_RULE (mf, domain); H_RULE (cname, domain); H_ARULE(soa, h_sequence(domain, // MNAME domain, // RNAME h_uint32(), // SERIAL h_uint32(), // REFRESH h_uint32(), // RETRY h_uint32(), // EXPIRE h_uint32(), // MINIMUM NULL)); H_RULE (mb, domain); H_RULE (mg, domain); H_RULE (mr, domain); H_VRULE(null, h_many(h_uint8())); H_RULE (wks, h_sequence(h_uint32(), h_uint8(), h_many(h_uint8()), NULL)); H_RULE (ptr, domain); H_RULE (hinfo, h_sequence(cstr, cstr, NULL)); H_RULE (minfo, h_sequence(domain, domain, NULL)); H_RULE (mx, h_sequence(h_uint16(), domain, NULL)); H_ARULE(txt, h_many1(cstr)); parsers[ 0] = NULL; // there is no type 0 parsers[ 1] = a; parsers[ 2] = ns; parsers[ 3] = md; parsers[ 4] = mf; parsers[ 5] = cname; parsers[ 6] = soa; parsers[ 7] = mb; parsers[ 8] = mg; parsers[ 9] = mr; parsers[10] = null; parsers[11] = wks; parsers[12] = ptr; parsers[13] = hinfo; parsers[14] = minfo; parsers[15] = mx; parsers[16] = txt; // All parsers must consume their input exactly. for(uint16_t i; i<sizeof(parsers); i++) { if(parsers[i]) { parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL), act_index0); } } inited = 1; return parsers[type]; }