static HParseResult* parse_difference(void *env, HParseState *state) { HTwoParsers *parsers = (HTwoParsers*)env; // cache the initial state of the input stream HInputStream start_state = state->input_stream; HParseResult *r1 = h_do_parse(parsers->p1, state); // if p1 failed, bail out early if (NULL == r1) { return NULL; } // cache the state after parse #1, since we might have to back up to it HInputStream after_p1_state = state->input_stream; state->input_stream = start_state; HParseResult *r2 = h_do_parse(parsers->p2, state); // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases state->input_stream = after_p1_state; // if p2 failed, restore post-p1 state and bail out early if (NULL == r2) { return r1; } size_t r1len = token_length(r1); size_t r2len = token_length(r2); // if both match but p1's text is shorter than p2's, fail if (r1len < r2len) { return NULL; } else { return r1; } }
HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { // Set up a parse state... HArena * arena = h_new_arena(mm__, 0); HParseState *parse_state = a_new_(arena, HParseState, 1); parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func cache_key_hash); // hash_func parse_state->input_stream.input = input; parse_state->input_stream.index = 0; parse_state->input_stream.bit_offset = 8; // bit big endian parse_state->input_stream.overrun = 0; parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN; parse_state->input_stream.length = length; parse_state->lr_stack = h_slist_new(arena); parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal, cache_key_hash); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); h_slist_free(parse_state->lr_stack); h_hashtable_free(parse_state->recursion_heads); // tear down the parse state h_hashtable_free(parse_state->cache); if (!res) h_delete_arena(parse_state->arena); return res; }
static HParseResult* parse_length_value(void *env, HParseState *state) { HLenVal *lv = (HLenVal*)env; HParseResult *len = h_do_parse(lv->length, state); if (!len) return NULL; if (len->ast->token_type != TT_UINT) errx(1, "Length parser must return an unsigned integer"); // TODO: allocate this using public functions HRepeat repeat = { .p = lv->value, .sep = h_epsilon_p(), .count = len->ast->uint, .min_p = false }; return parse_many(&repeat, state); } static const HParserVtable length_value_vt = { .parse = parse_length_value, .isValidRegular = h_false, .isValidCF = h_false, }; const HParser* h_length_value(const HParser* length, const HParser* value) { return h_length_value__m(&system_allocator, length, value); } const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) { HParser *res = h_new(HParser, 1); res->vtable = &length_value_vt; HLenVal *env = h_new(HLenVal, 1); env->length = length; env->value = value; res->env = (void*)env; return res; }
static HParseResult *parse_and(void* env, HParseState* state) { HInputStream bak = state->input_stream; HParseResult *res = h_do_parse((HParser*)env, state); state->input_stream = bak; if (res) return make_result(state->arena, NULL); return NULL; }
static HParseResult* parse_ignore(void* env, HParseState* state) { HParseResult *res0 = h_do_parse((HParser*)env, state); if (!res0) return NULL; HParseResult *res = a_new(HParseResult, 1); res->ast = NULL; res->arena = state->arena; return res; }
static HParseResult* parse_not(void* env, HParseState* state) { HInputStream bak = state->input_stream; if (h_do_parse((HParser*)env, state)) return NULL; else { state->input_stream = bak; return make_result(state, NULL); } }
static HParseResult* parse_optional(void* env, HParseState* state) { HInputStream bak = state->input_stream; HParseResult *res0 = h_do_parse((HParser*)env, state); if (res0) return res0; state->input_stream = bak; HParsedToken *ast = a_new(HParsedToken, 1); ast->token_type = TT_NONE; return make_result(state->arena, ast); }
static HParseResult* parse_attr_bool(void *env, HParseState *state) { HAttrBool *a = (HAttrBool*)env; HParseResult *res = h_do_parse(a->p, state); if (res && res->ast) { if (a->pred(res)) return res; else return NULL; } else return NULL; }
static HParseResult* parse_put(void *env, HParseState* state) { HStoredValue *s = (HStoredValue*)env; if (s->p && s->key && !h_symbol_get(state, s->key)) { HParseResult *tmp = h_do_parse(s->p, state); if (tmp) { h_symbol_put(state, s->key, tmp); } return tmp; } // otherwise there's no parser, no key, or key's stored already return NULL; }
static HParseResult* parse_whitespace(void* env, HParseState *state) { char c; HInputStream bak; do { bak = state->input_stream; c = h_read_bits(&state->input_stream, 8, false); if (state->input_stream.overrun) break; } while (isspace(c)); state->input_stream = bak; return h_do_parse((HParser*)env, state); }
static HParseResult* parse_choice(void *env, HParseState *state) { HSequence *s = (HSequence*)env; HInputStream backup = state->input_stream; for (size_t i=0; i<s->len; ++i) { if (i != 0) state->input_stream = backup; HParseResult *tmp = h_do_parse(s->p_array[i], state); if (NULL != tmp) return tmp; } // nothing succeeded, so fail return NULL; }
static HParseResult* parse_action(void *env, HParseState *state) { HParseAction *a = (HParseAction*)env; if (a->p && a->action) { HParseResult *tmp = h_do_parse(a->p, state); //HParsedToken *tok = a->action(h_do_parse(a->p, state)); if(tmp) { HParsedToken *tok = (HParsedToken*)a->action(tmp, a->user_data); return make_result(state->arena, tok); } else return NULL; } else // either the parser's missing or the action's missing return NULL; }
static HParseResult* parse_ignoreseq(void* env, HParseState *state) { const HIgnoreSeq *seq = (HIgnoreSeq*)env; HParseResult *res = NULL; for (size_t i=0; i < seq->len; ++i) { HParseResult *tmp = h_do_parse(seq->parsers[i], state); if (!tmp) return NULL; else if (i == seq->which) res = tmp; } return res; }
static HParseResult *parse_endianness(void *env, HParseState *state) { HParseEndianness *e = env; HParseResult *res = NULL; char diff = state->input_stream.endianness ^ e->endianness; if(!diff) { // all the same, nothing to do res = h_do_parse(e->p, state); } else { if(diff & BIT_BIG_ENDIAN) switch_bit_order(&state->input_stream); state->input_stream.endianness ^= diff; res = h_do_parse(e->p, state); state->input_stream.endianness ^= diff; if(diff & BIT_BIG_ENDIAN) switch_bit_order(&state->input_stream); } return res; }
static HParseResult* parse_xor(void *env, HParseState *state) { HTwoParsers *parsers = (HTwoParsers*)env; // cache the initial state of the input stream HInputStream start_state = state->input_stream; HParseResult *r1 = h_do_parse(parsers->p1, state); HInputStream after_p1_state = state->input_stream; // reset input stream, parse again state->input_stream = start_state; HParseResult *r2 = h_do_parse(parsers->p2, state); if (NULL == r1) { if (NULL != r2) { return r2; } else { return NULL; } } else { if (NULL == r2) { state->input_stream = after_p1_state; return r1; } else { return NULL; } } }
static HParseResult *parse_many(void* env, HParseState *state) { HRepeat *env_ = (HRepeat*) env; HCountedArray *seq = h_carray_new_sized(state->arena, (env_->count > 0 ? env_->count : 4)); size_t count = 0; HInputStream bak; while (env_->min_p || env_->count > count) { bak = state->input_stream; if (count > 0) { HParseResult *sep = h_do_parse(env_->sep, state); if (!sep) goto err0; } HParseResult *elem = h_do_parse(env_->p, state); if (!elem) goto err0; if (elem->ast) h_carray_append(seq, (void*)elem->ast); count++; } if (count < env_->count) goto err; succ: ; // necessary for the label to be here... HParsedToken *res = a_new(HParsedToken, 1); res->token_type = TT_SEQUENCE; res->seq = seq; return make_result(state, res); err0: if (count >= env_->count) { state->input_stream = bak; goto succ; } err: state->input_stream = bak; return NULL; }
static HParseResult* parse_sequence(void *env, HParseState *state) { HSequence *s = (HSequence*)env; HCountedArray *seq = h_carray_new_sized(state->arena, (s->len > 0) ? s->len : 4); for (size_t i=0; i<s->len; ++i) { HParseResult *tmp = h_do_parse(s->p_array[i], state); // if the interim parse fails, the whole thing fails if (NULL == tmp) { return NULL; } else { if (tmp->ast) h_carray_append(seq, (void*)tmp->ast); } } HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_SEQUENCE; tok->seq = seq; return make_result(state->arena, tok); }
HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) { HArena * arena = h_new_arena(mm__, 0); HParseState *parse_state = a_new_(arena, HParseState, 1); parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func cache_key_hash); // hash_func parse_state->input_stream = *input_stream; parse_state->lr_stack = h_slist_new(arena); parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); h_slist_free(parse_state->lr_stack); h_hashtable_free(parse_state->recursion_heads); // tear down the parse state h_hashtable_free(parse_state->cache); if (!res) h_delete_arena(parse_state->arena); return res; }
static HParseResult* parse_int_range(void *env, HParseState *state) { HRange *r_env = (HRange*)env; HParseResult *ret = h_do_parse(r_env->p, state); if (!ret || !ret->ast) return NULL; switch(ret->ast->token_type) { case TT_SINT: if (r_env->lower <= ret->ast->sint && r_env->upper >= ret->ast->sint) return ret; else return NULL; case TT_UINT: if ((uint64_t)r_env->lower <= ret->ast->uint && (uint64_t)r_env->upper >= ret->ast->uint) return ret; else return NULL; default: return NULL; } }