/* Construct a finite automaton from REGEXP and return it in *FA. * * Return NULL if REGEXP is valid, if the regexp REGEXP has syntax errors, * return an exception. */ static struct value *str_to_fa(struct info *info, const char *pattern, struct fa **fa) { int error; struct value *exn = NULL; size_t re_err_len; char *re_str, *re_err; error = fa_compile(pattern, strlen(pattern), fa); if (error == REG_NOERROR) return NULL; re_str = escape(pattern, -1); if (re_str == NULL) { FIXME("Out of memory"); } exn = make_exn_value(info, "Invalid regular expression /%s/", re_str); re_err_len = regerror(error, NULL, NULL, 0); if (ALLOC_N(re_err, re_err_len) < 0) { FIXME("Out of memory"); } regerror(error, NULL, re_err, re_err_len); exn_printf_line(exn, "%s", re_err); free(re_str); free(re_err); return exn; }
/* * Typechecking of lenses */ static struct value *disjoint_check(struct info *info, const char *msg, struct regexp *r1, struct regexp *r2) { struct fa *fa1 = NULL; struct fa *fa2 = NULL; struct fa *fa = NULL; struct value *exn = NULL; exn = regexp_to_fa(r1, &fa1); if (exn != NULL) goto done; exn = regexp_to_fa(r2, &fa2); if (exn != NULL) goto done; fa = fa_intersect(fa1, fa2); if (! fa_is_basic(fa, FA_EMPTY)) { size_t xmpl_len; char *xmpl; fa_example(fa, &xmpl, &xmpl_len); exn = make_exn_value(ref(info), "overlapping lenses in %s", msg); exn_printf_line(exn, "Example matched by both: '%s'", xmpl); free(xmpl); } done: fa_free(fa); fa_free(fa1); fa_free(fa2); return exn; }
static struct value *ambig_check(struct info *info, struct fa *fa1, struct fa *fa2, const char *msg) { char *upv, *pv, *v; size_t upv_len; fa_ambig_example(fa1, fa2, &upv, &upv_len, &pv, &v); struct value *exn = NULL; if (upv != NULL) { char *e_u = escape(upv, pv - upv); char *e_up = escape(upv, v - upv); char *e_upv = escape(upv, -1); char *e_pv = escape(pv, -1); char *e_v = escape(v, -1); exn = make_exn_value(ref(info), "%s", msg); exn_printf_line(exn, " '%s' can be split into", e_upv); exn_printf_line(exn, " '%s|=|%s'\n", e_u, e_pv); exn_printf_line(exn, " and"); exn_printf_line(exn, " '%s|=|%s'\n", e_up, e_v); free(e_u); free(e_up); free(e_upv); free(e_pv); free(e_v); } free(upv); return exn; }
static struct value *make_exn_lns_error(struct info *info, struct lns_error *err, const char *text) { struct value *v; if (HAS_ERR(info)) return exn_error(); v = make_exn_value(ref(info), "%s", err->message); if (err->lens != NULL) { char *s = format_info(err->lens->info); exn_printf_line(v, "Lens: %s", s); free(s); } if (err->pos >= 0) { char *pos = format_pos(text, err->pos); size_t line, ofs; calc_line_ofs(text, err->pos, &line, &ofs); exn_printf_line(v, "Error encountered at %d:%d (%d characters into string)", (int) line, (int) ofs, err->pos); if (pos != NULL) exn_printf_line(v, "%s", pos); free(pos); } else { exn_printf_line(v, "Error encountered at path %s", err->path); } return v; }
/* V_STRING -> V_TREE -> V_TREE */ static struct value *tree_rm_glue(struct info *info, struct value *path, struct value *tree) { // FIXME: This only works if TREE is not referenced more than once; // otherwise we'll have some pretty weird semantics, and would really // need to copy TREE first assert(path->tag == V_STRING); assert(tree->tag == V_TREE); struct pathx *p = NULL; struct value *result = NULL; if (pathx_parse(tree->origin, NULL, path->string->str, true, NULL, &p) != PATHX_NOERROR) { result = make_pathx_exn(ref(info), p); goto done; } if (tree_rm(p) == -1) { result = make_exn_value(ref(info), "Tree rm of %s failed", path->string->str); goto done; } result = ref(tree); done: free_pathx(p); return result; }
/* V_REGEXP -> V_STRING -> V_STRING */ static struct value *rx_match(struct info *info, struct value *rx, struct value *s) { struct value *result = NULL; const char *str = s->string->str; struct re_registers regs; int r; MEMZERO(®s, 1); r = regexp_match(rx->regexp, str, strlen(str), 0, ®s); if (r < -1) { result = make_exn_value(ref(info), "regexp match failed (internal error)"); } else { char *match = NULL; if (r == -1) { /* No match */ match = strdup(""); } else { match = strndup(str + regs.start[0], regs.end[0] - regs.start[0]); } if (match == NULL) { result = info->error->exn; } else { result = make_value(V_STRING, ref(info)); result->string = make_string(match); } } return result; }
static struct value *tree_insert_glue(struct info *info, struct value *label, struct value *path, struct value *tree, int before) { // FIXME: This only works if TREE is not referenced more than once; // otherwise we'll have some pretty weird semantics, and would really // need to copy TREE first assert(label->tag == V_STRING); assert(path->tag == V_STRING); assert(tree->tag == V_TREE); int r; struct pathx *p = NULL; struct value *result = NULL; result = pathx_parse_glue(info, tree, path, &p); if (result != NULL) goto done; r = tree_insert(p, label->string->str, before); if (r != 0) { result = make_exn_value(ref(info), "Tree insert of %s at %s failed", label->string->str, path->string->str); goto done; } result = ref(tree); done: free_pathx(p); return result; }
struct value *lns_make_star(struct info *info, struct lens *l, int check) { struct lens *lens; if (check) { struct value *exn = typecheck_iter(info, l); if (exn != NULL) { return exn; } } if (l->value) { return make_exn_value(info, "Multiple stores in iteration"); } if (l->key) { return make_exn_value(info, "Multiple keys/labels in iteration"); } lens = make_lens_unop(L_STAR, info, l); lens->ctype = regexp_iter(info, l->ctype, 0, -1); lens->atype = regexp_iter(info, l->atype, 0, -1); return make_lens_value(lens); }
struct value *lns_make_concat(struct info *info, struct lens *l1, struct lens *l2, int check) { struct lens *lens = NULL; if (check) { struct value *exn = typecheck_concat(info, l1, l2); if (exn != NULL) { return exn; } } if (l1->value && l2->value) { return make_exn_value(info, "Multiple stores in concat"); } if (l1->key && l2->key) { return make_exn_value(info, "Multiple keys/labels in concat"); } lens = make_lens_binop(L_CONCAT, info, l1, l2, regexp_concat_n); lens->consumes_value = l1->consumes_value || l2->consumes_value; return make_lens_value(lens); }
/* V_LENS -> V_FILTER -> V_TRANSFORM */ static struct value *xform_transform(struct info *info, struct value *l, struct value *f) { assert(l->tag == V_LENS); assert(f->tag == V_FILTER); if (l->lens->value || l->lens->key) { return make_exn_value(ref(info), "Can not build a transform " "from a lens that leaves a %s behind", l->lens->key ? "key" : "value"); } struct value *v = make_value(V_TRANSFORM, ref(info)); v->transform = make_transform(ref(l->lens), ref(f->filter)); return v; }
static struct value *typecheck_maybe(struct info *info, struct lens *l) { /* Check (r)? as (<e>|r) where <e> is the empty language */ struct value *exn = NULL; if (regexp_matches_empty(l->ctype)) { exn = make_exn_value(ref(info), "illegal optional expression: /%s/ matches the empty word", l->ctype->pattern->str); } /* Typecheck the put direction; the check passes if (1) the atype does not match the empty string, because we can tell from looking at tree nodes whether L should be applied or not (2) L handles a value; with that, we know whether to apply L or not depending on whether the current node has a non NULL value or not */ if (exn == NULL && ! l->consumes_value) { if (regexp_matches_empty(l->atype)) { exn = make_exn_value(ref(info), "optional expression matches the empty tree but does not consume a value"); } } return exn; }
static struct value *sys_read_file(struct info *info, struct value *n) { assert(n->tag == V_STRING); char *str = NULL; str = xread_file(n->string->str); if (str == NULL) { char error_buf[1024]; const char *errmsg; errmsg = xstrerror(errno, error_buf, sizeof(error_buf)); struct value *exn = make_exn_value(ref(info), "reading file %s failed:", n->string->str); exn_printf_line(exn, "%s", errmsg); return exn; } struct value *v = make_value(V_STRING, ref(info)); v->string = make_string(str); return v; }
static struct value *make_pathx_exn(struct info *info, struct pathx *p) { struct value *v; char *msg; const char *txt; int pos; msg = strdup(pathx_error(p, &txt, &pos)); if (msg == NULL) return NULL; v = make_exn_value(ref(info), "syntax error in path expression: %s", msg); if (ALLOC_N(msg, strlen(txt) + 4) >= 0) { strncpy(msg, txt, pos); strcat(msg, "|=|"); strcat(msg, txt + pos); exn_add_lines(v, 1, msg); } return v; }
/* V_STRING -> V_STRING -> V_TREE -> V_TREE */ static struct value *tree_set_glue(struct info *info, struct value *path, struct value *val, struct value *tree) { // FIXME: This only works if TREE is not referenced more than once; // otherwise we'll have some pretty weird semantics, and would really // need to copy TREE first assert(path->tag == V_STRING); assert(val->tag == V_STRING); assert(tree->tag == V_TREE); struct tree *fake = NULL; struct pathx *p = NULL; struct value *result = NULL; if (tree->origin->children == NULL) { tree->origin->children = make_tree(NULL, NULL, tree->origin, NULL); fake = tree->origin->children; } if (pathx_parse(tree->origin, NULL, path->string->str, true, NULL, &p) != PATHX_NOERROR) { result = make_pathx_exn(ref(info), p); goto done; } if (tree_set(p, val->string->str) == NULL) { result = make_exn_value(ref(info), "Tree set of %s to '%s' failed", path->string->str, val->string->str); goto done; } if (fake != NULL) { list_remove(fake, tree->origin->children); free_tree(fake); } result = ref(tree); done: free_pathx(p); return result; }
/* Calculate the regexp that matches the labels if the trees that L can generate. We have some headache here because of the behavior of STORE: since STORE creates a tree with no label (a leaf, really), its key regexp should be "/", but only of there is no KEY or LABEL statement that fills in the label of the tree that STORE created. */ static struct regexp *lns_key_regexp(struct lens *l, struct value **exn) { static const struct string digits_string = { .ref = REF_MAX, .str = (char *) "[0-9]+/" }; static const struct string *const digits_pat = &digits_string; *exn = NULL; switch(l->tag) { case L_STORE: case L_DEL: case L_COUNTER: return NULL; case L_SEQ: return make_regexp_from_string(l->info, (struct string *) digits_pat); case L_KEY: return make_key_regexp(l->info, l->regexp->pattern->str); case L_LABEL: { struct regexp *r = make_regexp_literal(l->info, l->string->str); if (r == NULL) return NULL; if (REALLOC_N(r->pattern->str, strlen(r->pattern->str) + 2) == -1) { unref(r, regexp); return NULL; } strcat(r->pattern->str, "/"); return r; } case L_CONCAT: { struct regexp *k = NULL; for (int i=0; i < l->nchildren; i++) { struct regexp *r = lns_key_regexp(l->children[i], exn); if (*exn != NULL) { free_regexp(k); return NULL; } if (r != NULL) { if (k != NULL) { *exn = make_exn_value(ref(l->info), "More than one key"); unref(r, regexp); unref(k, regexp); return NULL; } else { k = r; } } } return k; } break; case L_UNION: { struct regexp *k = NULL; for (int i=0; i < l->nchildren; i++) { struct regexp *r = lns_key_regexp(l->children[i], exn); if (*exn != NULL) return NULL; if (r != NULL) { if (k == NULL) { k = r; } else { struct regexp *u = regexp_union(l->info, k, r); unref(k, regexp); unref(r, regexp); k = u; } } } return k; } break; case L_SUBTREE: return NULL; break; case L_STAR: case L_MAYBE: return lns_key_regexp(l->child, exn); default: assert(0); } return NULL; } void free_lens(struct lens *lens) { if (lens == NULL) return; assert(lens->ref == 0); unref(lens->info, info); unref(lens->ctype, regexp); unref(lens->atype, regexp); switch (lens->tag) { case L_DEL: unref(lens->regexp, regexp); unref(lens->string, string); break; case L_STORE: case L_KEY: unref(lens->regexp, regexp); break; case L_LABEL: case L_SEQ: case L_COUNTER: unref(lens->string, string); break; case L_SUBTREE: case L_STAR: case L_MAYBE: unref(lens->child, lens); break; case L_CONCAT: case L_UNION: for (int i=0; i < lens->nchildren; i++) unref(lens->children[i], lens); free(lens->children); break; default: assert(0); break; } free(lens); } void lens_release(struct lens *lens) { if (lens == NULL) return; regexp_release(lens->ctype); regexp_release(lens->atype); if (lens->tag == L_KEY || lens->tag == L_STORE) regexp_release(lens->regexp); if (lens->tag == L_SUBTREE || lens->tag == L_STAR || lens->tag == L_MAYBE) { lens_release(lens->child); } if (lens->tag == L_UNION || lens->tag == L_CONCAT) { for (int i=0; i < lens->nchildren; i++) { lens_release(lens->children[i]); } } }
/* * Lens primitives */ struct value *lns_make_prim(enum lens_tag tag, struct info *info, struct regexp *regexp, struct string *string) { struct lens *lens = NULL; struct value *exn = NULL; struct fa *fa_slash = NULL; struct fa *fa_key = NULL; struct fa *fa_isect = NULL; /* Typecheck */ if (tag == L_KEY) { exn = str_to_fa(info, "(.|\n)*/(.|\n)*", &fa_slash); if (exn != NULL) goto error; exn = regexp_to_fa(regexp, &fa_key); if (exn != NULL) goto error; fa_isect = fa_intersect(fa_slash, fa_key); if (! fa_is_basic(fa_isect, FA_EMPTY)) { exn = make_exn_value(info, "The key regexp /%s/ matches a '/'", regexp->pattern->str); goto error; } fa_free(fa_isect); fa_free(fa_key); fa_free(fa_slash); fa_isect = fa_key = fa_slash = NULL; } else if (tag == L_LABEL) { if (strchr(string->str, SEP) != NULL) { exn = make_exn_value(info, "The label string \"%s\" contains a '/'", string->str); goto error; } } else if (tag == L_DEL) { int cnt; const char *dflt = string->str; cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL); if (cnt != strlen(dflt)) { char *s = escape(dflt, -1); char *r = escape(regexp->pattern->str, -1); exn = make_exn_value(info, "del: the default value '%s' does not match /%s/", s, r); FREE(s); FREE(r); goto error; } } /* Build the actual lens */ lens = make_lens(tag, info); lens->regexp = regexp; lens->string = string; lens->key = (tag == L_KEY || tag == L_LABEL || tag == L_SEQ); lens->value = (tag == L_STORE); lens->consumes_value = (tag == L_STORE); lens->atype = regexp_make_empty(info); if (tag == L_DEL || tag == L_STORE || tag == L_KEY) { lens->ctype = ref(regexp); } else if (tag == L_LABEL || tag == L_SEQ || tag == L_COUNTER) { lens->ctype = regexp_make_empty(info); } else { assert(0); } return make_lens_value(lens); error: fa_free(fa_isect); fa_free(fa_key); fa_free(fa_slash); return exn; }