static void test_bug118(void) { // https://github.com/UpstandingHackers/hammer/issues/118 // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 HParseResult* p; const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; #define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); #undef MY_ENDIAN H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); p = h_parse(parser_weird, input, 6); g_check_cmp_int32(p->bit_length, ==, 44); h_parse_result_free(p); p = h_parse(parser_ok, input, 6); g_check_cmp_int32(p->bit_length, ==, 40); h_parse_result_free(p); }
static void test_wrong_bit_length(void) { HParseResult *r; HParser *p; p = h_right(h_ch('a'), h_ch('b')); r = h_parse(p, (const uint8_t *)"ab", 2); g_check_cmp_int64(r->bit_length, ==, 16); h_parse_result_free(r); p = h_bind(h_ch('a'), k_test_wrong_bit_length, NULL); r = h_parse(p, (const uint8_t *)"ab", 2); g_check_cmp_int64(r->bit_length, ==, 16); h_parse_result_free(r); }
// dummy! int test_lalr(void) { HAllocator *mm__ = &system_allocator; /* E -> E '-' T | T T -> '(' E ')' | 'n' -- also try [0-9] for the charset paths */ HParser *n = h_ch('n'); HParser *E = h_indirect(); HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); h_bind_indirect(E, E_); HParser *p = E; printf("\n==== G R A M M A R ====\n"); HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); if (g == NULL) { fprintf(stderr, "h_cfgrammar failed\n"); return 1; } h_pprint_grammar(stdout, g, 0); printf("\n==== D F A ====\n"); HLRDFA *dfa = h_lr0_dfa(g); if (dfa) { h_pprint_lrdfa(stdout, g, dfa, 0); } else { fprintf(stderr, "h_lalr_dfa failed\n"); } printf("\n==== L R ( 0 ) T A B L E ====\n"); HLRTable *table0 = h_lr0_table(g, dfa); if (table0) { h_pprint_lrtable(stdout, g, table0, 0); } else { fprintf(stderr, "h_lr0_table failed\n"); } h_lrtable_free(table0); printf("\n==== L A L R T A B L E ====\n"); if (h_compile(p, PB_LALR, NULL)) { fprintf(stderr, "does not compile\n"); return 2; } h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0); printf("\n==== P A R S E R E S U L T ====\n"); HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13); if (res) { h_pprint(stdout, res->ast, 0, 2); } else { printf("no parse\n"); } return 0; }
int main(int argc, char **argv) { uint8_t input[102400]; size_t inputsize; const HParseResult *result; init_parser(); inputsize = fread(input, 1, sizeof(input), stdin); fprintf(stderr, "inputsize=%lu\ninput=", inputsize); fwrite(input, 1, inputsize, stderr); result = h_parse(document, input, inputsize); if(result) { fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); h_pprint(stdout, result->ast, 0, 0); return 0; } else { return 1; } }
HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTestcase* testcases) { // For now, just output the results to stderr HParserTestcase* tc = testcases; HParserBackend backend = PB_MIN; HBenchmarkResults *ret = h_new(HBenchmarkResults, 1); ret->len = PB_MAX-PB_MIN+1; ret->results = h_new(HBackendResults, ret->len); for (backend = PB_MIN; backend <= PB_MAX; backend++) { ret->results[backend].backend = backend; // Step 1: Compile grammar for given parser... if (h_compile(parser, backend, NULL) == -1) { // backend inappropriate for grammar... fprintf(stderr, "Compiling for %s failed\n", HParserBackendNames[backend]); ret->results[backend].compile_success = false; ret->results[backend].n_testcases = 0; ret->results[backend].failed_testcases = 0; ret->results[backend].cases = NULL; continue; } fprintf(stderr, "Compiled for %s\n", HParserBackendNames[backend]); ret->results[backend].compile_success = true; int tc_failed = 0; // Step 1: verify all test cases. ret->results[backend].n_testcases = 0; ret->results[backend].failed_testcases = 0; for (tc = testcases; tc->input != NULL; tc++) { ret->results[backend].n_testcases++; HParseResult *res = h_parse(parser, tc->input, tc->length); char* res_unamb; if (res != NULL) { res_unamb = h_write_result_unamb(res->ast); } else res_unamb = NULL; if ((res_unamb == NULL && tc->output_unambiguous != NULL) || (res_unamb != NULL && strcmp(res_unamb, tc->output_unambiguous) != 0)) { // test case failed... fprintf(stderr, "Parsing with %s failed\n", HParserBackendNames[backend]); // We want to run all testcases, for purposes of generating a // report. (eg, if users are trying to fix a grammar for a // faster backend) tc_failed++; ret->results[backend].failed_testcases++; } h_parse_result_free(res); free(res_unamb); } if (tc_failed > 0) { // Can't use this parser; skip to the next fprintf(stderr, "%s failed testcases; skipping benchmark\n", HParserBackendNames[backend]); continue; } ret->results[backend].cases = h_new(HCaseResult, ret->results[backend].n_testcases); size_t cur_case = 0; for (tc = testcases; tc->input != NULL; tc++) { // The goal is to run each testcase for at least 50ms each // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer) int count = 1, cur; struct timespec ts_start, ts_end; int64_t time_diff; do { count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway. h_benchmark_clock_gettime(&ts_start); for (cur = 0; cur < count; cur++) { h_parse_result_free(h_parse(parser, tc->input, tc->length)); } h_benchmark_clock_gettime(&ts_end); // time_diff is in ns time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); } while (time_diff < 100000000); ret->results[backend].cases[cur_case].parse_time = (time_diff / count); ret->results[backend].cases[cur_case].length = tc->length; cur_case++; } } return ret; }