Пример #1
0
Файл: rexp.c Проект: dcow/urbit
  u3_noun
  u3qe_rexp(u3_noun lub, u3_noun rad)
  {
    c3_y* lub_y = u3r_tape(lub);
    c3_y* rad_y = u3r_tape(rad);

    u3k(lub);
    int lub_l = u3kb_lent(lub);
    if (lub_l != strlen((char *)lub_y)) {
      free(lub_y);
      free(rad_y);
      return u3_nul;
    }

    char* rec = (char*)lub_y;
    char* end;
    while(*rec != 0) {
      if(*rec > 127) {
        free(lub_y);
        free(rad_y);
        return u3_nul;
      }
      else if(*rec == '\\') {
        rec++;
        switch (*rec) {
        case 'P':
        case 'p':
          free(lub_y);
          free(rad_y);
          return u3_nul;
        case 'Q':
          end = strstr(rec, "\\E");
          if(end == NULL) rec += strlen(rec) - 1;
          else rec = end;
        }
      }
      else if(*rec == '(') {
        rec++;
        if(*rec == '?') {
          rec++;
          if(*rec != ':') {
            free(lub_y);
            free(rad_y);
            return u3_nul;
          }
          rec++;
        }
      }
      else
        rec++;
    }

    cre2_regexp_t * rex;
    cre2_options_t * opt;

    opt = cre2_opt_new();
    if (opt) {
      cre2_opt_set_log_errors(opt, 0);
      cre2_opt_set_encoding(opt, CRE2_UTF8);
      cre2_opt_set_perl_classes(opt, 1);
      cre2_opt_set_one_line(opt, 1);
      cre2_opt_set_longest_match(opt, 1);
      rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt);
      if (rex) {
        if (!cre2_error_code(rex)) {
          int text_len = strlen((char *)rad_y);
          int captures = cre2_num_capturing_groups(rex);
          cre2_string_t matches[captures+1];

          int match = cre2_match(rex, (const char*)rad_y, text_len, 0, text_len, CRE2_UNANCHORED, matches, captures+1);

          if (!match) {
            // No matches
            cre2_opt_delete(opt);
            cre2_delete(rex);
            free(lub_y);
            free(rad_y);
            return u3i_cell(u3_nul, u3_nul);
          }

          u3_noun map = u3_nul;

          int i;
          for (i = 0; i < captures+1; i++) {
            char * buf = malloc(matches[i].length + 1);
            memcpy(buf, matches[i].data, matches[i].length);
            buf[matches[i].length] = 0;
            map = u3kdb_put(map, i, u3i_tape(buf));
            free(buf);
          }

          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          return u3i_cell(u3_nul, u3i_cell(u3_nul, map));

        }
        else {
          // Compiling the regular expression failed
          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          return u3_nul;
        }
        cre2_delete(rex);
      }
      cre2_opt_delete(opt);
    }
    free(lub_y);
    free(rad_y);
    u3m_bail(c3__exit);
    return u3_nul;
  }
Пример #2
0
  u2_noun                                                         //  produce
  j2_mbc(Pt5, repg)(u2_wire wir_r,
                    u2_noun lub,
                    u2_noun rad,
                    u2_noun rep)                                  //  retain
  {
    c3_y* lub_y = u2_cr_tape(lub);
    c3_y* rad_y = u2_cr_tape(rad);
    c3_y* rep_y = u2_cr_tape(rep);



    char* rec = (char*)lub_y;
    char* end;
    while(*rec != 0) {
      if(*rec == '\\') {
        rec++;
        switch (*rec) {
        case 'P':
        case 'p':
          free(lub_y);
          free(rad_y);
          return u2_nul;
        case 'Q':
          end = strstr(rec, "\\E");
          if(end == NULL) rec += strlen(rec) - 1;
          else rec = end;
        }
        rec++;
      }
      else if(*rec == '(') {
        rec++;
        if(*rec == '?') {
          rec++;
          if(*rec != ':') {
            free(lub_y);
            free(rad_y);
            return u2_nul;
          }
          rec++;
        }
      }
      else
        rec++;
    }

    cre2_regexp_t * rex;
    cre2_options_t * opt;

    opt = cre2_opt_new();
    if (opt) {
      cre2_opt_set_log_errors(opt, 0);
      cre2_opt_set_encoding(opt, CRE2_Latin1);
      cre2_opt_set_perl_classes(opt, 1);
      cre2_opt_set_one_line(opt, 1);
      cre2_opt_set_longest_match(opt, 1);
      rex = cre2_new((const char *)lub_y, strlen((char *)lub_y), opt);
      if (rex) {
        if (!cre2_error_code(rex)) {
          int text_len = strlen((char *)rad_y);
          cre2_string_t matches[1];
          int ic = 0;

          u2_noun ret = u2_nul;
          while (ic <= text_len) {
            int match = cre2_match(rex, (const char*)rad_y, text_len, ic, text_len, CRE2_ANCHOR_START, matches, 1);

            if (!match) {
              if(rad_y[ic])
                ret = u2_cn_cell((u2_atom)rad_y[ic], ret);
              ic++;
            }
            else {
              int mlen = matches[0].length;
              if (mlen == 0) {
                ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *) rad_y+ic)), u2_ckb_flop(u2_ci_tape((char *)rep_y)));
                ic = text_len + 1;
              }
              else {
                ret = u2_ckb_weld(u2_ckb_flop(u2_ci_tape((char *)rep_y)), ret);
                ic += mlen;
              }
            }
          }
          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          free(rep_y);
          return u2_cn_cell(u2_nul, u2_ckb_flop(ret));
        }
        else {
          // Compiling the regular expression failed
          cre2_opt_delete(opt);
          cre2_delete(rex);
          free(lub_y);
          free(rad_y);
          return u2_nul;
        }
        cre2_opt_delete(opt);
        cre2_delete(rex);
      }
      else {
        // rex Allocation Error
        cre2_opt_delete(opt);
        free(lub_y);
        free(rad_y);
        u2_bl_bail(wir_r, c3__exit);
      }
      cre2_opt_delete(opt);
    }
    // opt Allocation Error
    free(lub_y);
    free(rad_y);
    u2_bl_bail(wir_r, c3__exit);
    return u2_nul;
  }