Пример #1
0
CAMLprim value pcre_firsttable_stub(value v_rex)
{
  const unsigned char *ftable;

  int ret =
    pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTTABLE, (void *) &ftable);

  if (ret != 0) raise_internal_error("pcre_firsttable_stub");

  if (ftable == NULL) return None;
  else {
    value v_res, v_res_str;
    char *ptr;
    int i;

    Begin_roots1(v_rex);
      v_res_str = caml_alloc_string(32);
    End_roots();

    ptr = String_val(v_res_str);
    for (i = 0; i <= 31; ++i) { *ptr = *ftable; ++ptr; ++ftable; }

    Begin_roots1(v_res_str);
      /* Allocates [Some string] from firsttable */
      v_res = caml_alloc_small(1, 0);
    End_roots();

    Field(v_res, 0) = v_res_str;

    return v_res;
  }
}
Пример #2
0
CAMLprim value pcre_firstbyte_stub(value v_rex)
{
  int firstbyte;
  const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTBYTE, &firstbyte);

  if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError,
                                       "pcre_firstbyte_stub");

  switch (firstbyte) {
    case -1 : return var_Start_only; break;  /* [`Start_only] */
    case -2 : return var_ANCHORED; break;    /* [`ANCHORED] */
    default :
      if (firstbyte < 0 )  /* Should not happen */
        caml_raise_with_string(*pcre_exc_InternalError, "pcre_firstbyte_stub");
      else {
        value v_firstbyte;
        /* Allocates the non-constant constructor [`Char of char] and fills
           in the appropriate value */
        v_firstbyte = caml_alloc_small(2, 0);
        Field(v_firstbyte, 0) = var_Char;
        Field(v_firstbyte, 1) = Val_int(firstbyte);
        return v_firstbyte;
      }
  }
}
Пример #3
0
CAMLprim value pcre_lastliteral_stub(value v_rex)
{
  int lastliteral;
  const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_LASTLITERAL,
                                        &lastliteral);

  if (ret != 0) raise_internal_error("pcre_lastliteral_stub");

  if (lastliteral == -1) return None;
  if (lastliteral < 0) raise_internal_error("pcre_lastliteral_stub");
  else {
    /* Allocates [Some char] */
    value v_res = caml_alloc_small(1, 0);
    Field(v_res, 0) = Val_int(lastliteral);
    return v_res;
  }
}
Пример #4
0
CAMLprim value pcre_exec_stub0(
    intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
    value v_ovec, value v_maybe_cof, value v_workspace)
{
  int ret;
  int is_dfa = v_workspace != (value) NULL;
  long
    pos = v_pos,
    len = caml_string_length(v_subj),
    subj_start = v_subj_start;
  long ovec_len = Wosize_val(v_ovec);

  if (pos > len || pos < subj_start)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal position");

  if (subj_start > len || subj_start < 0)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start");

  pos -= subj_start;
  len -= subj_start;

  {
    const pcre *code = get_rex(v_rex);  /* Compiled pattern */
    const pcre_extra *extra = get_extra(v_rex);  /* Extra info */
    const char *ocaml_subj =
      String_val(v_subj) + subj_start;  /* Subject string */
    const int opt = v_opt;  /* Runtime options */

    /* Special case when no callout functions specified */
    if (v_maybe_cof == None) {
      int *ovec = (int *) &Field(v_ovec, 0);

      /* Performs the match */
      if (is_dfa)
        ret =
          pcre_dfa_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len,
              (int *) &Field(v_workspace, 0), Wosize_val(v_workspace));
      else
        ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len);

      if (ret < 0) handle_exec_error("pcre_exec_stub", ret);
      else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
    }

    /* There are callout functions */
    else {
      value v_cof = Field(v_maybe_cof, 0);
      value v_substrings;
      char *subj = caml_stat_alloc(sizeof(char) * len);
      int *ovec = caml_stat_alloc(sizeof(int) * ovec_len);
      int workspace_len;
      int *workspace;
      struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
      struct pcre_extra new_extra =
#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
# ifdef PCRE_EXTRA_MARK
#  ifdef PCRE_EXTRA_EXECUTABLE_JIT
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL };
#  else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL };
#  endif
# else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
# endif
#else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
#endif

      cod.subj_start = subj_start;
      memcpy(subj, ocaml_subj, len);

      Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
        Begin_roots1(v_subj);
          v_substrings = caml_alloc_small(2, 0);
        End_roots();

        Field(v_substrings, 0) = v_subj;
        Field(v_substrings, 1) = v_ovec;

        cod.v_substrings_p = &v_substrings;
        cod.v_cof_p = &v_cof;
        new_extra.callout_data = &cod;

        if (extra != NULL) {
          new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
          new_extra.study_data = extra->study_data;
          new_extra.match_limit = extra->match_limit;
          new_extra.tables = extra->tables;
#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
          new_extra.match_limit_recursion = extra->match_limit_recursion;
#endif
        }

        if (is_dfa) {
          workspace_len = Wosize_val(v_workspace);
          workspace = caml_stat_alloc(sizeof(int) * workspace_len);
          ret =
            pcre_dfa_exec(code, extra, subj, len, pos, opt, ovec, ovec_len,
                (int *) &Field(v_workspace, 0), workspace_len);
        } else
          ret =
            pcre_exec(code, &new_extra, subj, len, pos, opt, ovec, ovec_len);

        caml_stat_free(subj);
      End_roots();

      if (ret < 0) {
        if (is_dfa) caml_stat_free(workspace);
        caml_stat_free(ovec);
        if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn);
        else handle_exec_error("pcre_exec_stub(callout)", ret);
      } else {
        handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
        if (is_dfa) {
          caml_int_ptr ocaml_workspace_dst =
            (caml_int_ptr) &Field(v_workspace, 0);
          const int *workspace_src = workspace;
          const int *workspace_src_stop = workspace + workspace_len;
          while (workspace_src != workspace_src_stop) {
            *ocaml_workspace_dst = *workspace_src;
            ocaml_workspace_dst++;
            workspace_src++;
          }
          caml_stat_free(workspace);
        }
        caml_stat_free(ovec);
      }
    }
  }

  return Val_unit;
}

CAMLprim value pcre_exec_stub(
    intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
    value v_ovec, value v_maybe_cof)
{
  return pcre_exec_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj,
                         v_ovec, v_maybe_cof, (value) NULL);
}

/* Byte-code hook for pcre_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
{
  return
    pcre_exec_stub0(
        Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
        argv[4], argv[5], argv[6], (value) NULL);
}

/* Byte-code hook for pcre_dfa_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_dfa_exec_stub_bc(value *argv, int __unused argn)
{
  return
    pcre_exec_stub0(
        Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
        argv[4], argv[5], argv[6], argv[7]);
}

static struct custom_operations tables_ops = {
  "pcre_ocaml_tables",
  pcre_dealloc_tables,
  custom_compare_default,
  custom_hash_default,
  custom_serialize_default,
  custom_deserialize_default,
  custom_compare_ext_default
};

/* Generates a new set of chartables for the current locale (see man
   page of PCRE */
CAMLprim value pcre_maketables_stub(value __unused v_unit)
{
  /* GC will do a full cycle every 1_000_000 table set allocations (one
     table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
     table sets) */
  const value v_tables =
    caml_alloc_custom(
        &tables_ops, sizeof(struct pcre_ocaml_tables), 1, 1000000);
  set_tables(v_tables, pcre_maketables());
  return v_tables;
}

/* Wraps around the isspace-function */
CAMLprim value pcre_isspace_stub(value v_c)
{
  return Val_bool(isspace(Int_val(v_c)));
}


/* Returns number of substring associated with a name */

CAMLprim intnat pcre_get_stringnumber_stub(value v_rex, value v_name)
{
  const int ret = pcre_get_stringnumber(get_rex(v_rex), String_val(v_name));
  if (ret == PCRE_ERROR_NOSUBSTRING)
    caml_invalid_argument("Named string not found");

  return ret;
}

CAMLprim value pcre_get_stringnumber_stub_bc(value v_rex, value v_name)
{
  return Val_int(pcre_get_stringnumber_stub(v_rex, v_name));
}


/* Returns array of names of named substrings in a regexp */
CAMLprim value pcre_names_stub(value v_rex)
{
  CAMLparam0();
  CAMLlocal1(v_res);
  int name_count;
  int entry_size;
  const char *tbl_ptr;
  int i;

  int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
  if (ret != 0) raise_internal_error("pcre_names_stub: namecount");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
  if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
  if (ret != 0) raise_internal_error("pcre_names_stub: nametable");

  v_res = caml_alloc(name_count, 0);

  for (i = 0; i < name_count; ++i) {
    value v_name = caml_copy_string(tbl_ptr + 2);
    Store_field(v_res, i, v_name);
    tbl_ptr += entry_size;
  }

  CAMLreturn(v_res);
}

/* Generic stub for getting integer results from pcre_config */
static inline int pcre_config_int(int what)
{
  int ret;
  pcre_config(what, (void *) &ret);
  return ret;
}

/* Generic stub for getting long integer results from pcre_config */
static inline int pcre_config_long(int what)
{
  long ret;
  pcre_config(what, (void *) &ret);
  return ret;
}
Пример #5
0
/* Executes a pattern match with runtime options, a regular expression, a
   string offset, a string length, a subject string, a number of subgroup
   offsets, an offset vector and an optional callout function */
CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_ofs,
                              value v_subj, value v_subgroups2, value v_ovec,
                              value v_maybe_cof)
{
  const int ofs = Int_val(v_ofs), len = caml_string_length(v_subj);

  if (ofs > len || ofs < 0)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal offset");

  {
    const pcre *code = (pcre *) Field(v_rex, 1);  /* Compiled pattern */
    const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);  /* Extra info */
    const char *ocaml_subj = String_val(v_subj);  /* Subject string */
    const int opt = Int_val(v_opt);  /* Runtime options */
    int subgroups2 = Int_val(v_subgroups2);
    const int subgroups2_1 = subgroups2 - 1;
    const int subgroups3 = (subgroups2 >> 1) + subgroups2;

    /* Special case when no callout functions specified */
    if (v_maybe_cof == None) {
      int *ovec = (int *) &Field(v_ovec, 0);

      /* Performs the match */
      const int ret =
        pcre_exec(code, extra, ocaml_subj, len, ofs, opt, ovec, subgroups3);

      if (ret < 0) {
        switch(ret) {
          case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found);
          case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial);
          case PCRE_ERROR_MATCHLIMIT :
            caml_raise_constant(*pcre_exc_MatchLimit);
          case PCRE_ERROR_BADPARTIAL :
            caml_raise_constant(*pcre_exc_BadPartial);
          case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8);
          case PCRE_ERROR_BADUTF8_OFFSET :
            caml_raise_constant(*pcre_exc_BadUTF8Offset);
          default :
            caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub");
        }
      }

      else {
        const int *ovec_src = ovec + subgroups2_1;
        long int *ovec_dst = (long int *) ovec + subgroups2_1;

        /* Converts offsets from C-integers to OCaml-Integers
           This is a bit tricky, because there are 32- and 64-bit platforms
           around and OCaml chooses the larger possibility for representing
           integers when available (also in arrays) - not so the PCRE */
        while (subgroups2--) {
          *ovec_dst = Val_int(*ovec_src);
          --ovec_src; --ovec_dst;
        }
      }
    }

    /* There are callout functions */
    else {
      value v_cof = Field(v_maybe_cof, 0);
      value v_substrings;
      char *subj = caml_stat_alloc(sizeof(char) * len);
      int *ovec = caml_stat_alloc(sizeof(int) * subgroups3);
      int ret;
      struct cod cod = { (value *) NULL, (value *) NULL, (value) NULL };
      struct pcre_extra new_extra =
#ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
#else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
#endif

      memcpy(subj, ocaml_subj, len);

      Begin_roots3(v_rex, v_cof, v_substrings);
        Begin_roots2(v_subj, v_ovec);
          v_substrings = caml_alloc_small(2, 0);
        End_roots();

        Field(v_substrings, 0) = v_subj;
        Field(v_substrings, 1) = v_ovec;

        cod.v_substrings_p = &v_substrings;
        cod.v_cof_p = &v_cof;
        new_extra.callout_data = &cod;

        if (extra == NULL) {
          ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec,
                          subgroups3);
        }
        else {
          new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
          new_extra.study_data = extra->study_data;
          new_extra.match_limit = extra->match_limit;
          new_extra.tables = extra->tables;
#ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION
          new_extra.match_limit_recursion = extra->match_limit_recursion;
#endif

          ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec,
                          subgroups3);
        }

        free(subj);
      End_roots();

      if (ret < 0) {
        free(ovec);
        switch(ret) {
          case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found);
          case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial);
          case PCRE_ERROR_MATCHLIMIT :
            caml_raise_constant(*pcre_exc_MatchLimit);
          case PCRE_ERROR_BADPARTIAL :
            caml_raise_constant(*pcre_exc_BadPartial);
          case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8);
          case PCRE_ERROR_BADUTF8_OFFSET :
            caml_raise_constant(*pcre_exc_BadUTF8Offset);
          case PCRE_ERROR_CALLOUT : caml_raise(cod.v_exn);
          default :
            caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub");
        }
      }

      else {
        int *ovec_src = ovec + subgroups2_1;
        long int *ovec_dst = &Field(v_ovec, 0) + subgroups2_1;

        while (subgroups2--) {
          *ovec_dst = Val_int(*ovec_src);
          --ovec_src; --ovec_dst;
        }

        free(ovec);
      }
    }
  }

  return Val_unit;
}

/* Byte-code hook for pcre_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
{
  return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3],
                        argv[4], argv[5], argv[6]);
}

/* Generates a new set of chartables for the current locale (see man
   page of PCRE */
CAMLprim value pcre_maketables_stub(value __unused v_unit)
{
  /* GC will do a full cycle every 100 table set allocations
     (one table set consumes 864 bytes -> maximum of 86400 bytes
     unreclaimed table sets) */
  const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 864, 86400);
  Field(v_res, 1) = (value) pcre_maketables();
  return v_res;
}

/* Wraps around the isspace-function */
CAMLprim value pcre_isspace_stub(value v_c)
{
  return Val_bool(isspace(Int_val(v_c)));
}

/* Returns number of substring associated with a name */
CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name)
{
  const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1),
                                        String_val(v_name));
  if (ret == PCRE_ERROR_NOSUBSTRING)
    caml_invalid_argument("Named string not found");

  return Val_int(ret);
}

/* Returns array of names of named substrings in a regexp */
CAMLprim value pcre_names_stub(value v_rex)
{
  CAMLparam0();
  CAMLlocal1(v_res);
  int name_count;
  int entry_size;
  const char *tbl_ptr;
  int i;

  int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
  if (ret != 0)
    caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
  if (ret != 0)
    caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
  if (ret != 0)
    caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub");

  v_res = caml_alloc(name_count, 0);

  for (i = 0; i < name_count; ++i) {
    value v_name = caml_copy_string(tbl_ptr + 2);
    Store_field(v_res, i, v_name);
    tbl_ptr += entry_size;
  }

  CAMLreturn(v_res);
}