int cre2_easy_match (const char * pattern, int pattern_len, const char *text, int text_len, cre2_string_t *match, int nmatch) { cre2_regexp_t * rex; cre2_options_t * opt; int retval; // 0 for no match, 1 for successful // matching, 2 for wrong regexp opt = cre2_opt_new(); if (!opt) return 2; cre2_opt_set_log_errors(opt, 0); rex = cre2_new(pattern, pattern_len, opt); if (!rex) { cre2_opt_delete(opt); return 2; } { if (!cre2_error_code(rex)) { retval = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, match, nmatch); } else { retval = 2; } } cre2_delete(rex); cre2_opt_delete(opt); return retval; }
u3_noun u3qe_rexp(u3_noun lub, u3_noun rad) { c3_y* lub_y = u3r_tape(lub); c3_y* rad_y = u3r_tape(rad); u3k(lub); int lub_l = u3kb_lent(lub); if (lub_l != strlen((char *)lub_y)) { free(lub_y); free(rad_y); return u3_nul; } char* rec = (char*)lub_y; char* end; while(*rec != 0) { if(*rec > 127) { free(lub_y); free(rad_y); return u3_nul; } else if(*rec == '\\') { rec++; switch (*rec) { case 'P': case 'p': free(lub_y); free(rad_y); return u3_nul; case 'Q': end = strstr(rec, "\\E"); if(end == NULL) rec += strlen(rec) - 1; else rec = end; } } else if(*rec == '(') { rec++; if(*rec == '?') { rec++; if(*rec != ':') { free(lub_y); free(rad_y); return u3_nul; } rec++; } } else rec++; } cre2_regexp_t * rex; cre2_options_t * opt; opt = cre2_opt_new(); if (opt) { cre2_opt_set_log_errors(opt, 0); cre2_opt_set_encoding(opt, CRE2_UTF8); cre2_opt_set_perl_classes(opt, 1); cre2_opt_set_one_line(opt, 1); cre2_opt_set_longest_match(opt, 1); rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt); if (rex) { if (!cre2_error_code(rex)) { int text_len = strlen((char *)rad_y); int captures = cre2_num_capturing_groups(rex); cre2_string_t matches[captures+1]; int match = cre2_match(rex, (const char*)rad_y, text_len, 0, text_len, CRE2_UNANCHORED, matches, captures+1); if (!match) { // No matches cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u3i_cell(u3_nul, u3_nul); } u3_noun map = u3_nul; int i; for (i = 0; i < captures+1; i++) { char * buf = malloc(matches[i].length + 1); memcpy(buf, matches[i].data, matches[i].length); buf[matches[i].length] = 0; map = u3kdb_put(map, i, u3i_tape(buf)); free(buf); } cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u3i_cell(u3_nul, u3i_cell(u3_nul, map)); } else { // Compiling the regular expression failed cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u3_nul; } cre2_delete(rex); } cre2_opt_delete(opt); } free(lub_y); free(rad_y); u3m_bail(c3__exit); return u3_nul; }
int main (int argc, const char *const argv[]) { { /* quote meta characters */ const char * pattern = "1.5-2.0?"; cre2_string_t original = { .data = pattern, .length = strlen(pattern) }; cre2_string_t quoted; int result; result = cre2_quote_meta("ed, &original); if (0 != result) goto error; if (0 != strncmp("1\\.5\\-2\\.0\\?", quoted.data, quoted.length)) goto error; free((void *)quoted.data); } /* ------------------------------------------------------------------ */ { /* minimum and maximum matching strings */ const char * pattern = "(?i)ABCdef"; cre2_regexp_t * rex; cre2_string_t min, max; int result; rex = cre2_new(pattern, strlen(pattern), NULL); { result = cre2_possible_match_range(rex, &min, &max, 1024); if (1 != result) goto error; if (0 != strncmp("ABCDEF", min.data, min.length)) goto error; if (0 != strncmp("abcdef", max.data, max.length)) goto error; } cre2_delete(rex); free((void *)min.data); free((void *)max.data); } /* ------------------------------------------------------------------ */ { /* successfully check rewrite string */ const char * pattern = "a(b)c"; const char * subst = "def"; cre2_string_t rewrite = { .data = subst, .length = strlen(subst) }; cre2_regexp_t * rex; cre2_string_t errmsg; int result; rex = cre2_new(pattern, strlen(pattern), NULL); { result = cre2_check_rewrite_string(rex, &rewrite, &errmsg); if (1 != result) goto error; } cre2_delete(rex); } { /* failed check rewrite string */ const char * pattern = "a(b)c"; const char * subst = "\\1 \\2"; cre2_string_t rewrite = { .data = subst, .length = strlen(subst) }; cre2_regexp_t * rex; cre2_string_t errmsg; int result; rex = cre2_new(pattern, strlen(pattern), NULL); { result = cre2_check_rewrite_string(rex, &rewrite, &errmsg); if (0 != result) goto error; PRINTF("error message: "); FWRITE(errmsg.data, errmsg.length, 1); PRINTF("\n"); } cre2_delete(rex); free((void *)errmsg.data); } /* ------------------------------------------------------------------ */ exit(EXIT_SUCCESS); error: exit(EXIT_FAILURE); }
int main (void) { { /* success, no parentheses */ const char * pattern = "ci.*ut"; const char * text = "pre ciao salut post"; cre2_string_t input = { .data = text, .length = strlen(text) }; int result; result = cre2_partial_match(pattern, &input, NULL, 0); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; } { /* success, one parenthetical subexpression, one match entry */ const char * pattern = "(ciao) salut"; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 1; cre2_string_t match[nmatch]; int result; result = cre2_partial_match(pattern, &input, match, nmatch); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; if (0 != strncmp("ciao", match[0].data, match[0].length)) goto error; PRINTF("match 0: "); FWRITE(match[0].data, match[0].length, 1); PRINTF("\n"); } { /* success, two parenthetical subexpressions, two match entries */ const char * pattern = "(ciao) (salut)"; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 2; cre2_string_t match[nmatch]; int result; result = cre2_partial_match(pattern, &input, match, nmatch); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; if (0 != strncmp("ciao", match[0].data, match[0].length)) goto error; if (0 != strncmp("salut", match[1].data, match[1].length)) goto error; PRINTF("match 0: "); FWRITE(match[0].data, match[0].length, 1); PRINTF("\n"); PRINTF("match 1: "); FWRITE(match[1].data, match[1].length, 1); PRINTF("\n"); } { /* failure, no parentheses */ const char * pattern = "ci.*ut"; const char * text = "ciao hello"; cre2_string_t input = { .data = text, .length = strlen(text) }; int result; result = cre2_partial_match(pattern, &input, NULL, 0); if (result) goto error; } { /* failure, one parenthetical subexpression */ const char * pattern = "(ciao) salut"; const char * text = "ciao hello"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 1; cre2_string_t match[nmatch]; int result; result = cre2_partial_match(pattern, &input, match, nmatch); if (result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; } { /* success, one parenthetical subexpression, no match entries */ const char * pattern = "(ciao) salut"; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int result; result = cre2_partial_match(pattern, &input, NULL, 0); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; } { /* failure, one parenthetical subexpression, two match entries */ const char * pattern = "(ciao) salut"; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 2; cre2_string_t match[nmatch]; int result; memset(match, '\0', nmatch * sizeof(cre2_string_t)); result = cre2_partial_match(pattern, &input, match, nmatch); if (0 != result) goto error; } { /* success, two parenthetical subexpressions, one match entry */ const char * pattern = "(ciao) (salut)"; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 1; cre2_string_t match[nmatch]; int result; result = cre2_partial_match(pattern, &input, match, nmatch); if (! result) goto error; if (0 != strncmp("ciao", match[0].data, match[0].length)) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; PRINTF("match 0: "); FWRITE(match[0].data, match[0].length, 1); PRINTF("\n"); } { /* wrong regexp specification */ const char * pattern = "cia(o salut"; const char * text = "ciao hello"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 1; cre2_string_t match[nmatch]; int result; result = cre2_partial_match(pattern, &input, match, nmatch); if (0 != result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; } /* ------------------------------------------------------------------ */ { /* success, no parentheses */ const char * pattern = "ci.*ut"; cre2_regexp_t * rex; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int result; rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, NULL, 0); cre2_delete(rex); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; } { /* success, one parenthetical subexpression, one match entry */ const char * pattern = "(ciao) salut"; cre2_regexp_t * rex; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 1; cre2_string_t match[nmatch]; int result; rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, match, nmatch); cre2_delete(rex); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; if (0 != strncmp("ciao", match[0].data, match[0].length)) goto error; PRINTF("match 0: "); FWRITE(match[0].data, match[0].length, 1); PRINTF("\n"); } { /* success, two parenthetical subexpressions, two match entries */ const char * pattern = "(ciao) (salut)"; cre2_regexp_t * rex; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 2; cre2_string_t match[nmatch]; int result; rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, match, nmatch); cre2_delete(rex); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; if (0 != strncmp("ciao", match[0].data, match[0].length)) goto error; if (0 != strncmp("salut", match[1].data, match[1].length)) goto error; PRINTF("match 0: "); FWRITE(match[0].data, match[0].length, 1); PRINTF("\n"); PRINTF("match 1: "); FWRITE(match[1].data, match[1].length, 1); PRINTF("\n"); } { /* failure, no parentheses */ const char * pattern = "ci.*ut"; cre2_regexp_t * rex; const char * text = "ciao hello"; cre2_string_t input = { .data = text, .length = strlen(text) }; int result; rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, NULL, 0); cre2_delete(rex); if (result) goto error; } { /* failure, one parenthetical subexpression */ const char * pattern = "(ciao) salut"; cre2_regexp_t * rex; const char * text = "ciao hello"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 1; cre2_string_t match[nmatch]; int result; rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, match, nmatch); cre2_delete(rex); if (result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; } { /* success, one parenthetical subexpression, no match entries */ const char * pattern = "(ciao) salut"; cre2_regexp_t * rex; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int result; rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, NULL, 0); cre2_delete(rex); if (! result) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; } { /* failure, one parenthetical subexpression, two match entries */ const char * pattern = "(ciao) salut"; cre2_regexp_t * rex; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 2; cre2_string_t match[nmatch]; int result; memset(match, '\0', nmatch * sizeof(cre2_string_t)); rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, match, nmatch); cre2_delete(rex); if (0 != result) goto error; } { /* success, two parenthetical subexpressions, one match entry */ const char * pattern = "(ciao) (salut)"; cre2_regexp_t * rex; const char * text = "ciao salut"; cre2_string_t input = { .data = text, .length = strlen(text) }; int nmatch = 1; cre2_string_t match[nmatch]; int result; rex = cre2_new(pattern, strlen(pattern), NULL); result = cre2_partial_match_re(rex, &input, match, nmatch); cre2_delete(rex); if (! result) goto error; if (0 != strncmp("ciao", match[0].data, match[0].length)) goto error; if (0 != strncmp(text, input.data, input.length)) goto error; PRINTF("match 0: "); FWRITE(match[0].data, match[0].length, 1); PRINTF("\n"); } exit(EXIT_SUCCESS); error: exit(EXIT_FAILURE); }
int main (void) { cre2_regexp_t * rex; cre2_options_t * opt; const char * pattern; /* ------------------------------------------------------------------ */ /* single match */ pattern = "ciao"; opt = cre2_opt_new(); cre2_opt_set_posix_syntax(opt, 1); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; cre2_string_t match; int nmatch = 1; int e; const char * text = "ciao"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, &match, nmatch); if (1 != e) goto error; PRINTF("match: retval=%d, ", e); FWRITE(match.data, match.length, 1); PRINTF("\n"); } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ /* two groups */ pattern = "(ciao) (hello)"; opt = cre2_opt_new(); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; int nmatch = 3; cre2_string_t strings[nmatch]; cre2_range_t ranges[nmatch]; int e; const char * text = "ciao hello"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch); if (1 != e) goto error; cre2_strings_to_ranges(text, ranges, strings, nmatch); PRINTF("full match: "); FWRITE(text+ranges[0].start, ranges[0].past-ranges[0].start, 1); PRINTF("\n"); PRINTF("first group: "); FWRITE(text+ranges[1].start, ranges[1].past-ranges[1].start, 1); PRINTF("\n"); PRINTF("second group: "); FWRITE(text+ranges[2].start, ranges[2].past-ranges[2].start, 1); PRINTF("\n"); } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ /* test literal option */ pattern = "(ciao) (hello)"; opt = cre2_opt_new(); cre2_opt_set_literal(opt, 1); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; int nmatch = 0; int e; const char * text = "(ciao) (hello)"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, NULL, nmatch); if (0 == e) goto error; } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ /* test named groups */ pattern = "from (?P<S>.*) to (?P<D>.*)"; opt = cre2_opt_new(); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; int nmatch = cre2_num_capturing_groups(rex) + 1; cre2_string_t strings[nmatch]; int e, SIndex, DIndex; const char * text = "from Montreal, Canada to Lausanne, Switzerland"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch); if (0 == e) goto error; SIndex = cre2_find_named_capturing_groups(rex, "S"); if (0 != strncmp("Montreal, Canada", strings[SIndex].data, strings[SIndex].length)) goto error; DIndex = cre2_find_named_capturing_groups(rex, "D"); if (0 != strncmp("Lausanne, Switzerland", strings[DIndex].data, strings[DIndex].length)) goto error; } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ exit(EXIT_SUCCESS); error: exit(EXIT_FAILURE); }
u2_noun // produce j2_mbc(Pt5, repg)(u2_wire wir_r, u2_noun lub, u2_noun rad, u2_noun rep) // retain { c3_y* lub_y = u2_cr_tape(lub); c3_y* rad_y = u2_cr_tape(rad); c3_y* rep_y = u2_cr_tape(rep); char* rec = (char*)lub_y; char* end; while(*rec != 0) { if(*rec == '\\') { rec++; switch (*rec) { case 'P': case 'p': free(lub_y); free(rad_y); return u2_nul; case 'Q': end = strstr(rec, "\\E"); if(end == NULL) rec += strlen(rec) - 1; else rec = end; } rec++; } else if(*rec == '(') { rec++; if(*rec == '?') { rec++; if(*rec != ':') { free(lub_y); free(rad_y); return u2_nul; } rec++; } } else rec++; } cre2_regexp_t * rex; cre2_options_t * opt; opt = cre2_opt_new(); if (opt) { cre2_opt_set_log_errors(opt, 0); cre2_opt_set_encoding(opt, CRE2_Latin1); cre2_opt_set_perl_classes(opt, 1); cre2_opt_set_one_line(opt, 1); cre2_opt_set_longest_match(opt, 1); rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt); if (rex) { if (!cre2_error_code(rex)) { int text_len = strlen((char *)rad_y); cre2_string_t matches[1]; int ic = 0; u2_noun ret = u2_nul; while (ic <= text_len) { int match = cre2_match(rex, (const char*)rad_y, text_len, ic, text_len, CRE2_ANCHOR_START, matches, 1); if (!match) { if(rad_y[ic]) ret = u2_cn_cell((u2_atom)rad_y[ic], ret); ic++; } else { int mlen = matches[0].length; if (mlen == 0) { ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *) rad_y+ic)), u2_ckb_flop(u2_ci_tape((char *)rep_y))); ic = text_len + 1; } else { ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *)rep_y)), ret); ic += mlen; } } } cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); free(rep_y); return u2_cn_cell(u2_nul, u2_ckb_flop(ret)); } else { // Compiling the regular expression failed cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u2_nul; } cre2_opt_delete(opt); cre2_delete(rex); } else { // rex Allocation Error cre2_opt_delete(opt); free(lub_y); free(rad_y); u2_bl_bail(wir_r, c3__exit); } cre2_opt_delete(opt); } // opt Allocation Error free(lub_y); free(rad_y); u2_bl_bail(wir_r, c3__exit); return u2_nul; }