int cre2_easy_match (const char * pattern, int pattern_len, const char *text, int text_len, cre2_string_t *match, int nmatch) { cre2_regexp_t * rex; cre2_options_t * opt; int retval; // 0 for no match, 1 for successful // matching, 2 for wrong regexp opt = cre2_opt_new(); if (!opt) return 2; cre2_opt_set_log_errors(opt, 0); rex = cre2_new(pattern, pattern_len, opt); if (!rex) { cre2_opt_delete(opt); return 2; } { if (!cre2_error_code(rex)) { retval = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, match, nmatch); } else { retval = 2; } } cre2_delete(rex); cre2_opt_delete(opt); return retval; }
u3_noun u3qe_rexp(u3_noun lub, u3_noun rad) { c3_y* lub_y = u3r_tape(lub); c3_y* rad_y = u3r_tape(rad); u3k(lub); int lub_l = u3kb_lent(lub); if (lub_l != strlen((char *)lub_y)) { free(lub_y); free(rad_y); return u3_nul; } char* rec = (char*)lub_y; char* end; while(*rec != 0) { if(*rec > 127) { free(lub_y); free(rad_y); return u3_nul; } else if(*rec == '\\') { rec++; switch (*rec) { case 'P': case 'p': free(lub_y); free(rad_y); return u3_nul; case 'Q': end = strstr(rec, "\\E"); if(end == NULL) rec += strlen(rec) - 1; else rec = end; } } else if(*rec == '(') { rec++; if(*rec == '?') { rec++; if(*rec != ':') { free(lub_y); free(rad_y); return u3_nul; } rec++; } } else rec++; } cre2_regexp_t * rex; cre2_options_t * opt; opt = cre2_opt_new(); if (opt) { cre2_opt_set_log_errors(opt, 0); cre2_opt_set_encoding(opt, CRE2_UTF8); cre2_opt_set_perl_classes(opt, 1); cre2_opt_set_one_line(opt, 1); cre2_opt_set_longest_match(opt, 1); rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt); if (rex) { if (!cre2_error_code(rex)) { int text_len = strlen((char *)rad_y); int captures = cre2_num_capturing_groups(rex); cre2_string_t matches[captures+1]; int match = cre2_match(rex, (const char*)rad_y, text_len, 0, text_len, CRE2_UNANCHORED, matches, captures+1); if (!match) { // No matches cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u3i_cell(u3_nul, u3_nul); } u3_noun map = u3_nul; int i; for (i = 0; i < captures+1; i++) { char * buf = malloc(matches[i].length + 1); memcpy(buf, matches[i].data, matches[i].length); buf[matches[i].length] = 0; map = u3kdb_put(map, i, u3i_tape(buf)); free(buf); } cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u3i_cell(u3_nul, u3i_cell(u3_nul, map)); } else { // Compiling the regular expression failed cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u3_nul; } cre2_delete(rex); } cre2_opt_delete(opt); } free(lub_y); free(rad_y); u3m_bail(c3__exit); return u3_nul; }
int main (void) { cre2_regexp_t * rex; cre2_options_t * opt; const char * pattern; /* ------------------------------------------------------------------ */ /* single match */ pattern = "ciao"; opt = cre2_opt_new(); cre2_opt_set_posix_syntax(opt, 1); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; cre2_string_t match; int nmatch = 1; int e; const char * text = "ciao"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, &match, nmatch); if (1 != e) goto error; PRINTF("match: retval=%d, ", e); FWRITE(match.data, match.length, 1); PRINTF("\n"); } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ /* two groups */ pattern = "(ciao) (hello)"; opt = cre2_opt_new(); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; int nmatch = 3; cre2_string_t strings[nmatch]; cre2_range_t ranges[nmatch]; int e; const char * text = "ciao hello"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch); if (1 != e) goto error; cre2_strings_to_ranges(text, ranges, strings, nmatch); PRINTF("full match: "); FWRITE(text+ranges[0].start, ranges[0].past-ranges[0].start, 1); PRINTF("\n"); PRINTF("first group: "); FWRITE(text+ranges[1].start, ranges[1].past-ranges[1].start, 1); PRINTF("\n"); PRINTF("second group: "); FWRITE(text+ranges[2].start, ranges[2].past-ranges[2].start, 1); PRINTF("\n"); } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ /* test literal option */ pattern = "(ciao) (hello)"; opt = cre2_opt_new(); cre2_opt_set_literal(opt, 1); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; int nmatch = 0; int e; const char * text = "(ciao) (hello)"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, NULL, nmatch); if (0 == e) goto error; } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ /* test named groups */ pattern = "from (?P<S>.*) to (?P<D>.*)"; opt = cre2_opt_new(); rex = cre2_new(pattern, strlen(pattern), opt); { if (cre2_error_code(rex)) goto error; int nmatch = cre2_num_capturing_groups(rex) + 1; cre2_string_t strings[nmatch]; int e, SIndex, DIndex; const char * text = "from Montreal, Canada to Lausanne, Switzerland"; int text_len = strlen(text); e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch); if (0 == e) goto error; SIndex = cre2_find_named_capturing_groups(rex, "S"); if (0 != strncmp("Montreal, Canada", strings[SIndex].data, strings[SIndex].length)) goto error; DIndex = cre2_find_named_capturing_groups(rex, "D"); if (0 != strncmp("Lausanne, Switzerland", strings[DIndex].data, strings[DIndex].length)) goto error; } cre2_delete(rex); cre2_opt_delete(opt); /* ------------------------------------------------------------------ */ exit(EXIT_SUCCESS); error: exit(EXIT_FAILURE); }
u2_noun // produce j2_mbc(Pt5, repg)(u2_wire wir_r, u2_noun lub, u2_noun rad, u2_noun rep) // retain { c3_y* lub_y = u2_cr_tape(lub); c3_y* rad_y = u2_cr_tape(rad); c3_y* rep_y = u2_cr_tape(rep); char* rec = (char*)lub_y; char* end; while(*rec != 0) { if(*rec == '\\') { rec++; switch (*rec) { case 'P': case 'p': free(lub_y); free(rad_y); return u2_nul; case 'Q': end = strstr(rec, "\\E"); if(end == NULL) rec += strlen(rec) - 1; else rec = end; } rec++; } else if(*rec == '(') { rec++; if(*rec == '?') { rec++; if(*rec != ':') { free(lub_y); free(rad_y); return u2_nul; } rec++; } } else rec++; } cre2_regexp_t * rex; cre2_options_t * opt; opt = cre2_opt_new(); if (opt) { cre2_opt_set_log_errors(opt, 0); cre2_opt_set_encoding(opt, CRE2_Latin1); cre2_opt_set_perl_classes(opt, 1); cre2_opt_set_one_line(opt, 1); cre2_opt_set_longest_match(opt, 1); rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt); if (rex) { if (!cre2_error_code(rex)) { int text_len = strlen((char *)rad_y); cre2_string_t matches[1]; int ic = 0; u2_noun ret = u2_nul; while (ic <= text_len) { int match = cre2_match(rex, (const char*)rad_y, text_len, ic, text_len, CRE2_ANCHOR_START, matches, 1); if (!match) { if(rad_y[ic]) ret = u2_cn_cell((u2_atom)rad_y[ic], ret); ic++; } else { int mlen = matches[0].length; if (mlen == 0) { ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *) rad_y+ic)), u2_ckb_flop(u2_ci_tape((char *)rep_y))); ic = text_len + 1; } else { ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *)rep_y)), ret); ic += mlen; } } } cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); free(rep_y); return u2_cn_cell(u2_nul, u2_ckb_flop(ret)); } else { // Compiling the regular expression failed cre2_opt_delete(opt); cre2_delete(rex); free(lub_y); free(rad_y); return u2_nul; } cre2_opt_delete(opt); cre2_delete(rex); } else { // rex Allocation Error cre2_opt_delete(opt); free(lub_y); free(rad_y); u2_bl_bail(wir_r, c3__exit); } cre2_opt_delete(opt); } // opt Allocation Error free(lub_y); free(rad_y); u2_bl_bail(wir_r, c3__exit); return u2_nul; }