示例#1
0
const HParser *init_parser(void)
{
    // CORE
    H_RULE (digit,   h_ch_range(0x30, 0x39));
    H_RULE (alpha,   h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
    H_RULE (space,   h_in((uint8_t *)" \t\n\r\f\v", 6));

    // AUX.
    H_RULE (plus,    h_ch('+'));
    H_RULE (slash,   h_ch('/'));
    H_ARULE(equals,  h_ch('='));

    H_ARULE(bsfdig,       h_choice(alpha, digit, plus, slash, NULL));
    H_ARULE(bsfdig_4bit,  h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
    H_ARULE(bsfdig_2bit,  h_in((uint8_t *)"AQgw", 4));
    H_ARULE(base64_3,     h_repeat_n(bsfdig, 4));
    H_ARULE(base64_2,     h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
    H_ARULE(base64_1,     h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
    H_ARULE(base64,       h_sequence(h_many(base64_3),
                                     h_optional(h_choice(base64_2,
                                                         base64_1, NULL)),
                                     NULL));

    H_ARULE(ws,           h_many(space));
    H_ARULE(document,     h_sequence(ws, base64, ws, h_end_p(), NULL));

    // BUG sometimes inputs that should just don't parse.
    // It *seemed* to happen mostly with things like "bbbbaaaaBA==".
    // Using less actions seemed to make it less likely.

    return document;
}
示例#2
0
文件: lalr.c 项目: michiexile/hammer
// dummy!
int test_lalr(void)
{
  HAllocator *mm__ = &system_allocator;

  /* 
     E -> E '-' T
        | T
     T -> '(' E ')'
        | 'n'               -- also try [0-9] for the charset paths
  */

  HParser *n = h_ch('n');
  HParser *E = h_indirect();
  HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
  HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
  h_bind_indirect(E, E_);
  HParser *p = E;

  printf("\n==== G R A M M A R ====\n");
  HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
  if (g == NULL) {
    fprintf(stderr, "h_cfgrammar failed\n");
    return 1;
  }
  h_pprint_grammar(stdout, g, 0);

  printf("\n==== D F A ====\n");
  HLRDFA *dfa = h_lr0_dfa(g);
  if (dfa) {
    h_pprint_lrdfa(stdout, g, dfa, 0);
  } else {
    fprintf(stderr, "h_lalr_dfa failed\n");
  }

  printf("\n==== L R ( 0 )  T A B L E ====\n");
  HLRTable *table0 = h_lr0_table(g, dfa);
  if (table0) {
    h_pprint_lrtable(stdout, g, table0, 0);
  } else {
    fprintf(stderr, "h_lr0_table failed\n");
  }
  h_lrtable_free(table0);

  printf("\n==== L A L R  T A B L E ====\n");
  if (h_compile(p, PB_LALR, NULL)) {
    fprintf(stderr, "does not compile\n");
    return 2;
  }
  h_pprint_lrtable(stdout, g, (HLRTable *)p->backend_data, 0);

  printf("\n==== P A R S E  R E S U L T ====\n");
  HParseResult *res = h_parse(p, (uint8_t *)"n-(n-((n)))-n", 13);
  if (res) {
    h_pprint(stdout, res->ast, 0, 2);
  } else {
    printf("no parse\n");
  }
  return 0;
}
示例#3
0
static void test_cfg_many_seq(void) {
    HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL));

    g_check_parse_match(p, PB_LLk,  "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
    g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
    g_check_parse_match(p, PB_GLR,  "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
    // these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty
    // reshape on h_many.
}
示例#4
0
static void test_wrong_bit_length(void) {
    HParseResult *r;
    HParser *p;

    p = h_right(h_ch('a'), h_ch('b'));
    r = h_parse(p, (const uint8_t *)"ab", 2);
    g_check_cmp_int64(r->bit_length, ==, 16);
    h_parse_result_free(r);

    p = h_bind(h_ch('a'), k_test_wrong_bit_length, NULL);
    r = h_parse(p, (const uint8_t *)"ab", 2);
    g_check_cmp_int64(r->bit_length, ==, 16);
    h_parse_result_free(r);
}
示例#5
0
const HParser* init_domain() {
  static const HParser *domain = NULL;
  if (domain)
    return domain;

  const HParser *letter = h_choice(h_ch_range('a', 'z'),
				   h_ch_range('A', 'Z'),
				   NULL);

  const HParser *let_dig = h_choice(letter,
				    h_ch_range('0', '9'),
				    NULL);

  const HParser *ldh_str = h_many1(h_choice(let_dig,
					    h_ch('-'),
					    NULL));

  const HParser *label = h_attr_bool(h_sequence(letter,
						h_optional(h_sequence(h_optional(ldh_str),
								      let_dig,
								      NULL)),
						NULL),
				     validate_label);

  /**
   * You could write it like this ...
   *   HParser *indirect_subdomain = h_indirect();
   *   const HParser *subdomain = h_choice(label,
   *				           h_sequence(indirect_subdomain,
   *					              h_ch('.'),
   *					              label,
   *					              NULL),
   *				           NULL);
   *   h_bind_indirect(indirect_subdomain, subdomain);
   *
   * ... but this is easier and equivalent
   */

  const HParser *subdomain = h_sepBy1(label, h_ch('.'));

  domain = h_choice(subdomain, 
		    h_ch(' '), 
		    NULL); 

  return domain;
}
示例#6
0
HParser* init_domain() {
  static HParser *ret = NULL;
  if (ret)
    return ret;

  H_RULE  (letter,    h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL));
  H_RULE  (let_dig,   h_choice(letter, h_ch_range('0','9'), NULL));
  H_RULE  (ldh_str,   h_many1(h_choice(let_dig, h_ch('-'), NULL)));
  H_VARULE(label,     h_sequence(letter,
				 h_optional(h_sequence(h_optional(ldh_str),
						       let_dig,
						       NULL)),
				 NULL));
  H_RULE  (subdomain, h_sepBy1(label, h_ch('.')));
  H_ARULE (domain,    h_choice(subdomain, h_ch(' '), NULL));

  ret = domain;
  return ret;
}
示例#7
0
static void test_llk_zero_end(void) {
    HParserBackend be = PB_LLk;
    HParser *z = h_ch('\x00');
    HParser *az = h_sequence(h_ch('a'), z, NULL);
    HParser *ze = h_sequence(z, h_end_p(), NULL);
    HParser *aze = h_sequence(h_ch('a'), z, h_end_p(), NULL);

    // some cases surrounding the bug
    g_check_parse_match (z, be, "\x00", 1, "u0");
    g_check_parse_failed(z, be, "", 0);
    g_check_parse_match (ze, be, "\x00", 1, "(u0)");
    g_check_parse_failed(ze, be, "\x00b", 2);
    g_check_parse_failed(ze, be, "", 0);
    g_check_parse_match (az, be, "a\x00", 2, "(u0x61 u0)");
    g_check_parse_match (aze, be, "a\x00", 2, "(u0x61 u0)");
    g_check_parse_failed(aze, be, "a\x00b", 3);

    // the following should not parse but did when the LL(k) backend failed to
    // check for the end of input, mistaking it for a zero character.
    g_check_parse_failed(az, be, "a", 1);
    g_check_parse_failed(aze, be, "a", 1);
}
示例#8
0
static void test_lalr_charset_lhs(void) {
    HParserBackend be = PB_LALR;

    HParser *p = h_many(h_choice(h_sequence(h_ch('A'), h_ch('B'), NULL),
                                 h_in((uint8_t*)"AB",2), NULL));

    // the above would abort because of an unhandled case in trying to resolve
    // a conflict where an item's left-hand-side was an HCF_CHARSET.
    // however, the compile should fail - the conflict cannot be resolved.

    if(h_compile(p, be, NULL) == 0) {
        g_test_message("LALR compile didn't detect ambiguous grammar");

        // it says it compiled it - well, then it should parse it!
        // (this helps us see what it thinks it should be doing.)
        g_check_parse_match(p, be, "AA",2, "(u0x41 u0x41)");
        g_check_parse_match(p, be, "AB",2, "((u0x41 u0x42))");

        g_test_fail();
        return;
    }
}
示例#9
0
文件: base64.c 项目: JakobR/hammer
void init_parser(void)
{
    // CORE
    HParser *digit = h_ch_range(0x30, 0x39);
    HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL);

    // AUX.
    HParser *plus = h_ch('+');
    HParser *slash = h_ch('/');
    HParser *equals = h_ch('=');

    HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
    HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16);
    HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4);
    HParser *base64_3 = h_repeat_n(bsfdig, 4);
    HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
    HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
    HParser *base64 = h_sequence(h_many(base64_3),
                                       h_optional(h_choice(base64_2,
                                                           base64_1, NULL)),
                                       NULL);

    document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL);
}
示例#10
0
static void test_benchmark_1() {
  HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); 

  HBenchmarkResults *res = h_benchmark(parser, testcases);
  h_benchmark_report(stderr, res);
}
示例#11
0
文件: base64.c 项目: kmitwork/hammer
void init_parser(void)
{
    // CORE
    const HParser *digit = h_ch_range(0x30, 0x39);
    const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL);

    // AUX.
    const HParser *plus = h_ch('+');
    const HParser *slash = h_ch('/');
    const HParser *equals = h_ch('=');

    const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
    const HParser *bsfdig_4bit = h_choice(
        h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'),
        h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'),
        h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL);
    const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL);
    const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
    const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
    const HParser *base64 = h_choice(base64_2, base64_1, NULL);
        // why does this parse "A=="?!
        // why does this parse "aaA=" but not "aA=="?!

    document = base64;
}