Ejemplo n.º 1
0
int
cre2_easy_match (const char * pattern, int pattern_len,
		 const char *text, int text_len,
		 cre2_string_t *match, int nmatch)
{
  cre2_regexp_t *	rex;
  cre2_options_t *	opt;
  int			retval; // 0  for  no  match, 1	 for  successful
				// matching, 2 for wrong regexp
  opt	= cre2_opt_new();
  if (!opt) return 2;
  cre2_opt_set_log_errors(opt, 0);
  rex	= cre2_new(pattern, pattern_len, opt);
  if (!rex) {
    cre2_opt_delete(opt);
    return 2;
  }
  {
    if (!cre2_error_code(rex)) {
      retval = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, match, nmatch);
    } else {
      retval = 2;
    }
  }
  cre2_delete(rex);
  cre2_opt_delete(opt);
  return retval;
}
Ejemplo n.º 2
0
Archivo: rexp.c Proyecto: dcow/urbit
  u3_noun
  u3qe_rexp(u3_noun lub, u3_noun rad)
  {
    c3_y* lub_y = u3r_tape(lub);
    c3_y* rad_y = u3r_tape(rad);

    u3k(lub);
    int lub_l = u3kb_lent(lub);
    if (lub_l != strlen((char *)lub_y)) {
      free(lub_y);
      free(rad_y);
      return u3_nul;
    }

    char* rec = (char*)lub_y;
    char* end;
    while(*rec != 0) {
      if(*rec > 127) {
        free(lub_y);
        free(rad_y);
        return u3_nul;
      }
      else if(*rec == '\\') {
        rec++;
        switch (*rec) {
        case 'P':
        case 'p':
          free(lub_y);
          free(rad_y);
          return u3_nul;
        case 'Q':
          end = strstr(rec, "\\E");
          if(end == NULL) rec += strlen(rec) - 1;
          else rec = end;
        }
      }
      else if(*rec == '(') {
        rec++;
        if(*rec == '?') {
          rec++;
          if(*rec != ':') {
            free(lub_y);
            free(rad_y);
            return u3_nul;
          }
          rec++;
        }
      }
      else
        rec++;
    }

    cre2_regexp_t * rex;
    cre2_options_t * opt;

    opt = cre2_opt_new();
    if (opt) {
      cre2_opt_set_log_errors(opt, 0);
      cre2_opt_set_encoding(opt, CRE2_UTF8);
      cre2_opt_set_perl_classes(opt, 1);
      cre2_opt_set_one_line(opt, 1);
      cre2_opt_set_longest_match(opt, 1);
      rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt);
      if (rex) {
        if (!cre2_error_code(rex)) {
          int text_len = strlen((char *)rad_y);
          int captures = cre2_num_capturing_groups(rex);
          cre2_string_t matches[captures+1];

          int match = cre2_match(rex, (const char*)rad_y, text_len, 0, text_len, CRE2_UNANCHORED, matches, captures+1);

          if (!match) {
            // No matches
            cre2_opt_delete(opt);
            cre2_delete(rex);
            free(lub_y);
            free(rad_y);
            return u3i_cell(u3_nul, u3_nul);
          }

          u3_noun map = u3_nul;

          int i;
          for (i = 0; i < captures+1; i++) {
            char * buf = malloc(matches[i].length + 1);
            memcpy(buf, matches[i].data, matches[i].length);
            buf[matches[i].length] = 0;
            map = u3kdb_put(map, i, u3i_tape(buf));
            free(buf);
          }

          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          return u3i_cell(u3_nul, u3i_cell(u3_nul, map));

        }
        else {
          // Compiling the regular expression failed
          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          return u3_nul;
        }
        cre2_delete(rex);
      }
      cre2_opt_delete(opt);
    }
    free(lub_y);
    free(rad_y);
    u3m_bail(c3__exit);
    return u3_nul;
  }
Ejemplo n.º 3
0
int
main (int argc, const char *const argv[])
{
  { /* quote meta characters */
    const char *	pattern	 = "1.5-2.0?";
    cre2_string_t	original = {
      .data   = pattern,
      .length = strlen(pattern)
    };
    cre2_string_t	quoted;
    int			result;
    result = cre2_quote_meta(&quoted, &original);
    if (0 != result)
	goto error;
    if (0 != strncmp("1\\.5\\-2\\.0\\?", quoted.data, quoted.length))
      goto error;
    free((void *)quoted.data);
  }

  /* ------------------------------------------------------------------ */

  { /* minimum and maximum matching strings */
    const char *	pattern = "(?i)ABCdef";
    cre2_regexp_t *	rex;
    cre2_string_t	min, max;
    int			result;
    rex = cre2_new(pattern, strlen(pattern), NULL);
    {
      result = cre2_possible_match_range(rex, &min, &max, 1024);
      if (1 != result)
      	goto error;
      if (0 != strncmp("ABCDEF", min.data, min.length))
	goto error;
      if (0 != strncmp("abcdef", max.data, max.length))
	goto error;
    }
    cre2_delete(rex);
    free((void *)min.data);
    free((void *)max.data);
  }

  /* ------------------------------------------------------------------ */

  { /* successfully check rewrite string */
    const char *	pattern = "a(b)c";
    const char *	subst   = "def";
    cre2_string_t	rewrite = {
      .data	= subst,
      .length	= strlen(subst)
    };
    cre2_regexp_t *	rex;
    cre2_string_t	errmsg;
    int			result;
    rex = cre2_new(pattern, strlen(pattern), NULL);
    {
      result = cre2_check_rewrite_string(rex, &rewrite, &errmsg);
      if (1 != result)
      	goto error;
    }
    cre2_delete(rex);
  }
  { /* failed check rewrite string */
    const char *	pattern = "a(b)c";
    const char *	subst   = "\\1 \\2";
    cre2_string_t	rewrite = {
      .data	= subst,
      .length	= strlen(subst)
    };
    cre2_regexp_t *	rex;
    cre2_string_t	errmsg;
    int			result;
    rex = cre2_new(pattern, strlen(pattern), NULL);
    {
      result = cre2_check_rewrite_string(rex, &rewrite, &errmsg);
      if (0 != result)
      	goto error;
      PRINTF("error message: ");
      FWRITE(errmsg.data, errmsg.length, 1);
      PRINTF("\n");
    }
    cre2_delete(rex);
    free((void *)errmsg.data);
  }

/* ------------------------------------------------------------------ */

  exit(EXIT_SUCCESS);
 error:
  exit(EXIT_FAILURE);
}
Ejemplo n.º 4
0
int
main (void)
{
  { /* success, no parentheses */
    const char *	pattern = "ci.*ut";
    const char *	text	= "pre ciao salut post";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			result;
    result = cre2_partial_match(pattern, &input, NULL, 0);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
  }
  { /* success, one parenthetical subexpression, one match entry */
    const char *	pattern = "(ciao) salut";
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 1;
    cre2_string_t	match[nmatch];
    int			result;
    result = cre2_partial_match(pattern, &input, match, nmatch);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
    if (0 != strncmp("ciao", match[0].data, match[0].length))
      goto error;
    PRINTF("match 0: ");
    FWRITE(match[0].data, match[0].length, 1);
    PRINTF("\n");
  }
  { /* success, two parenthetical subexpressions, two match entries */
    const char *	pattern = "(ciao) (salut)";
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 2;
    cre2_string_t	match[nmatch];
    int			result;
    result = cre2_partial_match(pattern, &input, match, nmatch);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
    if (0 != strncmp("ciao", match[0].data, match[0].length))
      goto error;
    if (0 != strncmp("salut", match[1].data, match[1].length))
      goto error;
    PRINTF("match 0: ");
    FWRITE(match[0].data, match[0].length, 1);
    PRINTF("\n");
    PRINTF("match 1: ");
    FWRITE(match[1].data, match[1].length, 1);
    PRINTF("\n");
  }
  { /* failure, no parentheses */
    const char *	pattern = "ci.*ut";
    const char *	text	= "ciao hello";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			result;
    result = cre2_partial_match(pattern, &input, NULL, 0);
    if (result)
      goto error;
  }
  { /* failure, one parenthetical subexpression */
    const char *	pattern = "(ciao) salut";
    const char *	text	= "ciao hello";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 1;
    cre2_string_t	match[nmatch];
    int			result;
    result = cre2_partial_match(pattern, &input, match, nmatch);
    if (result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
  }
  { /* success, one parenthetical subexpression, no match entries */
    const char *	pattern = "(ciao) salut";
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			result;
    result = cre2_partial_match(pattern, &input, NULL, 0);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
  }
  { /* failure, one parenthetical subexpression, two match entries */
    const char *	pattern = "(ciao) salut";
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 2;
    cre2_string_t	match[nmatch];
    int			result;
    memset(match, '\0', nmatch * sizeof(cre2_string_t));
    result = cre2_partial_match(pattern, &input, match, nmatch);
    if (0 != result)
      goto error;
  }
  { /* success, two parenthetical subexpressions, one match entry */
    const char *	pattern = "(ciao) (salut)";
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 1;
    cre2_string_t	match[nmatch];
    int			result;
    result = cre2_partial_match(pattern, &input, match, nmatch);
    if (! result)
      goto error;
    if (0 != strncmp("ciao", match[0].data, match[0].length))
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
    PRINTF("match 0: ");
    FWRITE(match[0].data, match[0].length, 1);
    PRINTF("\n");
  }
  { /* wrong regexp specification */
    const char *	pattern = "cia(o salut";
    const char *	text	= "ciao hello";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 1;
    cre2_string_t	match[nmatch];
    int			result;
    result = cre2_partial_match(pattern, &input, match, nmatch);
    if (0 != result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
  }


/* ------------------------------------------------------------------ */

  { /* success, no parentheses */
    const char *	pattern = "ci.*ut";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			result;
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, NULL, 0);
    cre2_delete(rex);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
  }
  { /* success, one parenthetical subexpression, one match entry */
    const char *	pattern = "(ciao) salut";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 1;
    cre2_string_t	match[nmatch];
    int			result;
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, match, nmatch);
    cre2_delete(rex);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
    if (0 != strncmp("ciao", match[0].data, match[0].length))
      goto error;
    PRINTF("match 0: ");
    FWRITE(match[0].data, match[0].length, 1);
    PRINTF("\n");
  }
  { /* success, two parenthetical subexpressions, two match entries */
    const char *	pattern = "(ciao) (salut)";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 2;
    cre2_string_t	match[nmatch];
    int			result;
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, match, nmatch);
    cre2_delete(rex);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
    if (0 != strncmp("ciao", match[0].data, match[0].length))
      goto error;
    if (0 != strncmp("salut", match[1].data, match[1].length))
      goto error;
    PRINTF("match 0: ");
    FWRITE(match[0].data, match[0].length, 1);
    PRINTF("\n");
    PRINTF("match 1: ");
    FWRITE(match[1].data, match[1].length, 1);
    PRINTF("\n");
  }
  { /* failure, no parentheses */
    const char *	pattern = "ci.*ut";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao hello";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			result;
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, NULL, 0);
    cre2_delete(rex);
    if (result)
      goto error;
  }
  { /* failure, one parenthetical subexpression */
    const char *	pattern = "(ciao) salut";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao hello";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 1;
    cre2_string_t	match[nmatch];
    int			result;
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, match, nmatch);
    cre2_delete(rex);
    if (result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
  }
  { /* success, one parenthetical subexpression, no match entries */
    const char *	pattern = "(ciao) salut";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			result;
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, NULL, 0);
    cre2_delete(rex);
    if (! result)
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
  }
  { /* failure, one parenthetical subexpression, two match entries */
    const char *	pattern = "(ciao) salut";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 2;
    cre2_string_t	match[nmatch];
    int			result;
    memset(match, '\0', nmatch * sizeof(cre2_string_t));
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, match, nmatch);
    cre2_delete(rex);
    if (0 != result)
      goto error;
  }
  { /* success, two parenthetical subexpressions, one match entry */
    const char *	pattern = "(ciao) (salut)";
    cre2_regexp_t *	rex;
    const char *	text	= "ciao salut";
    cre2_string_t	input   = { .data = text, .length = strlen(text) };
    int			nmatch  = 1;
    cre2_string_t	match[nmatch];
    int			result;
    rex    = cre2_new(pattern, strlen(pattern), NULL);
    result = cre2_partial_match_re(rex, &input, match, nmatch);
    cre2_delete(rex);
    if (! result)
      goto error;
    if (0 != strncmp("ciao", match[0].data, match[0].length))
      goto error;
    if (0 != strncmp(text, input.data, input.length))
      goto error;
    PRINTF("match 0: ");
    FWRITE(match[0].data, match[0].length, 1);
    PRINTF("\n");
  }

  exit(EXIT_SUCCESS);
 error:
  exit(EXIT_FAILURE);
}
Ejemplo n.º 5
0
int
main (void)
{
  cre2_regexp_t *	rex;
  cre2_options_t *	opt;
  const char *		pattern;

/* ------------------------------------------------------------------ */
/* single match */

  pattern = "ciao";
  opt     = cre2_opt_new();
  cre2_opt_set_posix_syntax(opt, 1);
  rex = cre2_new(pattern, strlen(pattern), opt);
  {
    if (cre2_error_code(rex))
      goto error;
    cre2_string_t	match;
    int			nmatch = 1;
    int			e;
    const char *	text = "ciao";
    int			text_len = strlen(text);

    e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, &match, nmatch);
    if (1 != e)
      goto error;
    PRINTF("match: retval=%d, ", e);
    FWRITE(match.data, match.length, 1);
    PRINTF("\n");
  }
  cre2_delete(rex);
  cre2_opt_delete(opt);

/* ------------------------------------------------------------------ */
/* two groups */

  pattern = "(ciao) (hello)";
  opt = cre2_opt_new();
  rex = cre2_new(pattern, strlen(pattern), opt);
  {
    if (cre2_error_code(rex))
      goto error;
    int			nmatch = 3;
    cre2_string_t	strings[nmatch];
    cre2_range_t	ranges[nmatch];
    int			e;
    const char *	text = "ciao hello";
    int			text_len = strlen(text);

    e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch);
    if (1 != e)
      goto error;
    cre2_strings_to_ranges(text, ranges, strings, nmatch);
    PRINTF("full match: ");
    FWRITE(text+ranges[0].start, ranges[0].past-ranges[0].start, 1);
    PRINTF("\n");
    PRINTF("first group: ");
    FWRITE(text+ranges[1].start, ranges[1].past-ranges[1].start, 1);
    PRINTF("\n");
    PRINTF("second group: ");
    FWRITE(text+ranges[2].start, ranges[2].past-ranges[2].start, 1);
    PRINTF("\n");
  }
  cre2_delete(rex);
  cre2_opt_delete(opt);

/* ------------------------------------------------------------------ */
/* test literal option */

  pattern = "(ciao) (hello)";
  opt = cre2_opt_new();
  cre2_opt_set_literal(opt, 1);
  rex = cre2_new(pattern, strlen(pattern), opt);
  {
    if (cre2_error_code(rex))
      goto error;
    int			nmatch = 0;
    int			e;
    const char *	text = "(ciao) (hello)";
    int			text_len = strlen(text);
    e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, NULL, nmatch);
    if (0 == e)
      goto error;
  }
  cre2_delete(rex);
  cre2_opt_delete(opt);

/* ------------------------------------------------------------------ */
/* test named groups */

  pattern = "from (?P<S>.*) to (?P<D>.*)";
  opt = cre2_opt_new();
  rex = cre2_new(pattern, strlen(pattern), opt);
  {
    if (cre2_error_code(rex))
      goto error;
    int			nmatch = cre2_num_capturing_groups(rex) + 1;
    cre2_string_t	strings[nmatch];
    int			e, SIndex, DIndex;
    const char *	text = "from Montreal, Canada to Lausanne, Switzerland";
    int			text_len = strlen(text);
    e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch);
    if (0 == e)
      goto error;
    SIndex = cre2_find_named_capturing_groups(rex, "S");
    if (0 != strncmp("Montreal, Canada",      strings[SIndex].data, strings[SIndex].length))
      goto error;
    DIndex = cre2_find_named_capturing_groups(rex, "D");
    if (0 != strncmp("Lausanne, Switzerland", strings[DIndex].data, strings[DIndex].length))
      goto error;
  }
  cre2_delete(rex);
  cre2_opt_delete(opt);

/* ------------------------------------------------------------------ */



  exit(EXIT_SUCCESS);
 error:
  exit(EXIT_FAILURE);
}
Ejemplo n.º 6
0
  u2_noun                                                         //  produce
  j2_mbc(Pt5, repg)(u2_wire wir_r,
                    u2_noun lub,
                    u2_noun rad,
                    u2_noun rep)                                  //  retain
  {
    c3_y* lub_y = u2_cr_tape(lub);
    c3_y* rad_y = u2_cr_tape(rad);
    c3_y* rep_y = u2_cr_tape(rep);



    char* rec = (char*)lub_y;
    char* end;
    while(*rec != 0) {
      if(*rec == '\\') {
        rec++;
        switch (*rec) {
        case 'P':
        case 'p':
          free(lub_y);
          free(rad_y);
          return u2_nul;
        case 'Q':
          end = strstr(rec, "\\E");
          if(end == NULL) rec += strlen(rec) - 1;
          else rec = end;
        }
        rec++;
      }
      else if(*rec == '(') {
        rec++;
        if(*rec == '?') {
          rec++;
          if(*rec != ':') {
            free(lub_y);
            free(rad_y);
            return u2_nul;
          }
          rec++;
        }
      }
      else
        rec++;
    }

    cre2_regexp_t * rex;
    cre2_options_t * opt;

    opt = cre2_opt_new();
    if (opt) {
      cre2_opt_set_log_errors(opt, 0);
      cre2_opt_set_encoding(opt, CRE2_Latin1);
      cre2_opt_set_perl_classes(opt, 1);
      cre2_opt_set_one_line(opt, 1);
      cre2_opt_set_longest_match(opt, 1);
      rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt);
      if (rex) {
        if (!cre2_error_code(rex)) {
          int text_len = strlen((char *)rad_y);
          cre2_string_t matches[1];
          int ic = 0;

          u2_noun ret = u2_nul;
          while (ic <= text_len) {
            int match = cre2_match(rex, (const char*)rad_y, text_len, ic, text_len, CRE2_ANCHOR_START, matches, 1);

            if (!match) {
              if(rad_y[ic])
                ret = u2_cn_cell((u2_atom)rad_y[ic], ret);
              ic++;
            }
            else {
              int mlen = matches[0].length;
              if (mlen == 0) {
                ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *) rad_y+ic)), u2_ckb_flop(u2_ci_tape((char *)rep_y)));
                ic = text_len + 1;
              }
              else {
                ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *)rep_y)), ret);
                ic += mlen;
              }
            }
          }
          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          free(rep_y);
          return u2_cn_cell(u2_nul, u2_ckb_flop(ret));
        }
        else {
          // Compiling the regular expression failed
          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          return u2_nul;
        }
        cre2_opt_delete(opt);
        cre2_delete(rex);
      }
      else {
        // rex Allocation Error
        cre2_opt_delete(opt);
        free(lub_y);
        free(rad_y);
        u2_bl_bail(wir_r, c3__exit);
      }
      cre2_opt_delete(opt);
    }
    // opt Allocation Error
    free(lub_y);
    free(rad_y);
    u2_bl_bail(wir_r, c3__exit);
    return u2_nul;
  }