CAMLprim value pcre_firsttable_stub(value v_rex) { const unsigned char *ftable; int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTTABLE, (void *) &ftable); if (ret != 0) raise_internal_error("pcre_firsttable_stub"); if (ftable == NULL) return None; else { value v_res, v_res_str; char *ptr; int i; Begin_roots1(v_rex); v_res_str = caml_alloc_string(32); End_roots(); ptr = String_val(v_res_str); for (i = 0; i <= 31; ++i) { *ptr = *ftable; ++ptr; ++ftable; } Begin_roots1(v_res_str); /* Allocates [Some string] from firsttable */ v_res = caml_alloc_small(1, 0); End_roots(); Field(v_res, 0) = v_res_str; return v_res; } }
CAMLprim value pcre_firstbyte_stub(value v_rex) { int firstbyte; const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTBYTE, &firstbyte); if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError, "pcre_firstbyte_stub"); switch (firstbyte) { case -1 : return var_Start_only; break; /* [`Start_only] */ case -2 : return var_ANCHORED; break; /* [`ANCHORED] */ default : if (firstbyte < 0 ) /* Should not happen */ caml_raise_with_string(*pcre_exc_InternalError, "pcre_firstbyte_stub"); else { value v_firstbyte; /* Allocates the non-constant constructor [`Char of char] and fills in the appropriate value */ v_firstbyte = caml_alloc_small(2, 0); Field(v_firstbyte, 0) = var_Char; Field(v_firstbyte, 1) = Val_int(firstbyte); return v_firstbyte; } } }
CAMLprim value pcre_lastliteral_stub(value v_rex) { int lastliteral; const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_LASTLITERAL, &lastliteral); if (ret != 0) raise_internal_error("pcre_lastliteral_stub"); if (lastliteral == -1) return None; if (lastliteral < 0) raise_internal_error("pcre_lastliteral_stub"); else { /* Allocates [Some char] */ value v_res = caml_alloc_small(1, 0); Field(v_res, 0) = Val_int(lastliteral); return v_res; } }
CAMLprim value pcre_exec_stub0( intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj, value v_ovec, value v_maybe_cof, value v_workspace) { int ret; int is_dfa = v_workspace != (value) NULL; long pos = v_pos, len = caml_string_length(v_subj), subj_start = v_subj_start; long ovec_len = Wosize_val(v_ovec); if (pos > len || pos < subj_start) caml_invalid_argument("Pcre.pcre_exec_stub: illegal position"); if (subj_start > len || subj_start < 0) caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start"); pos -= subj_start; len -= subj_start; { const pcre *code = get_rex(v_rex); /* Compiled pattern */ const pcre_extra *extra = get_extra(v_rex); /* Extra info */ const char *ocaml_subj = String_val(v_subj) + subj_start; /* Subject string */ const int opt = v_opt; /* Runtime options */ /* Special case when no callout functions specified */ if (v_maybe_cof == None) { int *ovec = (int *) &Field(v_ovec, 0); /* Performs the match */ if (is_dfa) ret = pcre_dfa_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len, (int *) &Field(v_workspace, 0), Wosize_val(v_workspace)); else ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len); if (ret < 0) handle_exec_error("pcre_exec_stub", ret); else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret); } /* There are callout functions */ else { value v_cof = Field(v_maybe_cof, 0); value v_substrings; char *subj = caml_stat_alloc(sizeof(char) * len); int *ovec = caml_stat_alloc(sizeof(int) * ovec_len); int workspace_len; int *workspace; struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL }; struct pcre_extra new_extra = #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION # ifdef PCRE_EXTRA_MARK # ifdef PCRE_EXTRA_EXECUTABLE_JIT { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL }; # else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL }; # endif # else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 }; # endif #else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL }; #endif cod.subj_start = subj_start; memcpy(subj, ocaml_subj, len); Begin_roots4(v_rex, v_cof, v_substrings, v_ovec); Begin_roots1(v_subj); v_substrings = caml_alloc_small(2, 0); End_roots(); Field(v_substrings, 0) = v_subj; Field(v_substrings, 1) = v_ovec; cod.v_substrings_p = &v_substrings; cod.v_cof_p = &v_cof; new_extra.callout_data = &cod; if (extra != NULL) { new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags; new_extra.study_data = extra->study_data; new_extra.match_limit = extra->match_limit; new_extra.tables = extra->tables; #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION new_extra.match_limit_recursion = extra->match_limit_recursion; #endif } if (is_dfa) { workspace_len = Wosize_val(v_workspace); workspace = caml_stat_alloc(sizeof(int) * workspace_len); ret = pcre_dfa_exec(code, extra, subj, len, pos, opt, ovec, ovec_len, (int *) &Field(v_workspace, 0), workspace_len); } else ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec, ovec_len); caml_stat_free(subj); End_roots(); if (ret < 0) { if (is_dfa) caml_stat_free(workspace); caml_stat_free(ovec); if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn); else handle_exec_error("pcre_exec_stub(callout)", ret); } else { handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret); if (is_dfa) { caml_int_ptr ocaml_workspace_dst = (caml_int_ptr) &Field(v_workspace, 0); const int *workspace_src = workspace; const int *workspace_src_stop = workspace + workspace_len; while (workspace_src != workspace_src_stop) { *ocaml_workspace_dst = *workspace_src; ocaml_workspace_dst++; workspace_src++; } caml_stat_free(workspace); } caml_stat_free(ovec); } } } return Val_unit; } CAMLprim value pcre_exec_stub( intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj, value v_ovec, value v_maybe_cof) { return pcre_exec_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj, v_ovec, v_maybe_cof, (value) NULL); } /* Byte-code hook for pcre_exec_stub Needed, because there are more than 5 arguments */ CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn) { return pcre_exec_stub0( Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]), argv[4], argv[5], argv[6], (value) NULL); } /* Byte-code hook for pcre_dfa_exec_stub Needed, because there are more than 5 arguments */ CAMLprim value pcre_dfa_exec_stub_bc(value *argv, int __unused argn) { return pcre_exec_stub0( Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]), argv[4], argv[5], argv[6], argv[7]); } static struct custom_operations tables_ops = { "pcre_ocaml_tables", pcre_dealloc_tables, custom_compare_default, custom_hash_default, custom_serialize_default, custom_deserialize_default, custom_compare_ext_default }; /* Generates a new set of chartables for the current locale (see man page of PCRE */ CAMLprim value pcre_maketables_stub(value __unused v_unit) { /* GC will do a full cycle every 1_000_000 table set allocations (one table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed table sets) */ const value v_tables = caml_alloc_custom( &tables_ops, sizeof(struct pcre_ocaml_tables), 1, 1000000); set_tables(v_tables, pcre_maketables()); return v_tables; } /* Wraps around the isspace-function */ CAMLprim value pcre_isspace_stub(value v_c) { return Val_bool(isspace(Int_val(v_c))); } /* Returns number of substring associated with a name */ CAMLprim intnat pcre_get_stringnumber_stub(value v_rex, value v_name) { const int ret = pcre_get_stringnumber(get_rex(v_rex), String_val(v_name)); if (ret == PCRE_ERROR_NOSUBSTRING) caml_invalid_argument("Named string not found"); return ret; } CAMLprim value pcre_get_stringnumber_stub_bc(value v_rex, value v_name) { return Val_int(pcre_get_stringnumber_stub(v_rex, v_name)); } /* Returns array of names of named substrings in a regexp */ CAMLprim value pcre_names_stub(value v_rex) { CAMLparam0(); CAMLlocal1(v_res); int name_count; int entry_size; const char *tbl_ptr; int i; int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count); if (ret != 0) raise_internal_error("pcre_names_stub: namecount"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size); if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr); if (ret != 0) raise_internal_error("pcre_names_stub: nametable"); v_res = caml_alloc(name_count, 0); for (i = 0; i < name_count; ++i) { value v_name = caml_copy_string(tbl_ptr + 2); Store_field(v_res, i, v_name); tbl_ptr += entry_size; } CAMLreturn(v_res); } /* Generic stub for getting integer results from pcre_config */ static inline int pcre_config_int(int what) { int ret; pcre_config(what, (void *) &ret); return ret; } /* Generic stub for getting long integer results from pcre_config */ static inline int pcre_config_long(int what) { long ret; pcre_config(what, (void *) &ret); return ret; }
/* Executes a pattern match with runtime options, a regular expression, a string offset, a string length, a subject string, a number of subgroup offsets, an offset vector and an optional callout function */ CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_ofs, value v_subj, value v_subgroups2, value v_ovec, value v_maybe_cof) { const int ofs = Int_val(v_ofs), len = caml_string_length(v_subj); if (ofs > len || ofs < 0) caml_invalid_argument("Pcre.pcre_exec_stub: illegal offset"); { const pcre *code = (pcre *) Field(v_rex, 1); /* Compiled pattern */ const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2); /* Extra info */ const char *ocaml_subj = String_val(v_subj); /* Subject string */ const int opt = Int_val(v_opt); /* Runtime options */ int subgroups2 = Int_val(v_subgroups2); const int subgroups2_1 = subgroups2 - 1; const int subgroups3 = (subgroups2 >> 1) + subgroups2; /* Special case when no callout functions specified */ if (v_maybe_cof == None) { int *ovec = (int *) &Field(v_ovec, 0); /* Performs the match */ const int ret = pcre_exec(code, extra, ocaml_subj, len, ofs, opt, ovec, subgroups3); if (ret < 0) { switch(ret) { case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found); case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial); case PCRE_ERROR_MATCHLIMIT : caml_raise_constant(*pcre_exc_MatchLimit); case PCRE_ERROR_BADPARTIAL : caml_raise_constant(*pcre_exc_BadPartial); case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8); case PCRE_ERROR_BADUTF8_OFFSET : caml_raise_constant(*pcre_exc_BadUTF8Offset); default : caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub"); } } else { const int *ovec_src = ovec + subgroups2_1; long int *ovec_dst = (long int *) ovec + subgroups2_1; /* Converts offsets from C-integers to OCaml-Integers This is a bit tricky, because there are 32- and 64-bit platforms around and OCaml chooses the larger possibility for representing integers when available (also in arrays) - not so the PCRE */ while (subgroups2--) { *ovec_dst = Val_int(*ovec_src); --ovec_src; --ovec_dst; } } } /* There are callout functions */ else { value v_cof = Field(v_maybe_cof, 0); value v_substrings; char *subj = caml_stat_alloc(sizeof(char) * len); int *ovec = caml_stat_alloc(sizeof(int) * subgroups3); int ret; struct cod cod = { (value *) NULL, (value *) NULL, (value) NULL }; struct pcre_extra new_extra = #ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 }; #else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL }; #endif memcpy(subj, ocaml_subj, len); Begin_roots3(v_rex, v_cof, v_substrings); Begin_roots2(v_subj, v_ovec); v_substrings = caml_alloc_small(2, 0); End_roots(); Field(v_substrings, 0) = v_subj; Field(v_substrings, 1) = v_ovec; cod.v_substrings_p = &v_substrings; cod.v_cof_p = &v_cof; new_extra.callout_data = &cod; if (extra == NULL) { ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec, subgroups3); } else { new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags; new_extra.study_data = extra->study_data; new_extra.match_limit = extra->match_limit; new_extra.tables = extra->tables; #ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION new_extra.match_limit_recursion = extra->match_limit_recursion; #endif ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec, subgroups3); } free(subj); End_roots(); if (ret < 0) { free(ovec); switch(ret) { case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found); case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial); case PCRE_ERROR_MATCHLIMIT : caml_raise_constant(*pcre_exc_MatchLimit); case PCRE_ERROR_BADPARTIAL : caml_raise_constant(*pcre_exc_BadPartial); case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8); case PCRE_ERROR_BADUTF8_OFFSET : caml_raise_constant(*pcre_exc_BadUTF8Offset); case PCRE_ERROR_CALLOUT : caml_raise(cod.v_exn); default : caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub"); } } else { int *ovec_src = ovec + subgroups2_1; long int *ovec_dst = &Field(v_ovec, 0) + subgroups2_1; while (subgroups2--) { *ovec_dst = Val_int(*ovec_src); --ovec_src; --ovec_dst; } free(ovec); } } } return Val_unit; } /* Byte-code hook for pcre_exec_stub Needed, because there are more than 5 arguments */ CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn) { return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); } /* Generates a new set of chartables for the current locale (see man page of PCRE */ CAMLprim value pcre_maketables_stub(value __unused v_unit) { /* GC will do a full cycle every 100 table set allocations (one table set consumes 864 bytes -> maximum of 86400 bytes unreclaimed table sets) */ const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 864, 86400); Field(v_res, 1) = (value) pcre_maketables(); return v_res; } /* Wraps around the isspace-function */ CAMLprim value pcre_isspace_stub(value v_c) { return Val_bool(isspace(Int_val(v_c))); } /* Returns number of substring associated with a name */ CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name) { const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1), String_val(v_name)); if (ret == PCRE_ERROR_NOSUBSTRING) caml_invalid_argument("Named string not found"); return Val_int(ret); } /* Returns array of names of named substrings in a regexp */ CAMLprim value pcre_names_stub(value v_rex) { CAMLparam0(); CAMLlocal1(v_res); int name_count; int entry_size; const char *tbl_ptr; int i; int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count); if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size); if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr); if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub"); v_res = caml_alloc(name_count, 0); for (i = 0; i < name_count; ++i) { value v_name = caml_copy_string(tbl_ptr + 2); Store_field(v_res, i, v_name); tbl_ptr += entry_size; } CAMLreturn(v_res); }