unsigned long mlre2__custom_regex_deserialize(void * dst) { int len = caml_deserialize_sint_4(); RE2::Options options; char * pattern = (char *) caml_stat_alloc(sizeof(*pattern) * (len)); caml_deserialize_block_1(pattern, len); pattern[len - 1] = '\0'; options.Copy(RE2::Quiet); options.set_max_mem(caml_deserialize_sint_8()); options_of_bitfield((uint16_t) caml_deserialize_uint_2(), options); #ifdef DEBUG std::cerr << "deserialized regex /" << pattern << "/" << std::endl; #endif *(RE2 **) dst = new RE2(pattern, options); caml_stat_free(pattern); return sizeof(RE2 *); }
/* returns (cre2__obj_t * int * (string * int) list) where * - cre2__obj_t is the ML-side name for a custom_block with a struct regex * * - int is the number of submatches, including the whole match * - (string * int) list is the Map.to_alist of the submatch (name, index) Map.t */ CAMLprim value mlre2__create_re(value v_options, value v_pattern) { value v_retval, v_compile_error; const char * c_pat = String_val(v_pattern); RE2::Options opt; RE2* compiled = NULL; opt.Copy(RE2::Quiet); while (v_options != Val_emptylist) { int val = Int_val(Field(Field(v_options, 0), 0)); switch (Tag_val(Field(v_options, 0))) { #define X(_u,FIRST,REST,_uu) case FIRST##REST : opt.set_##FIRST##REST(val); break; #define X__ENCODING(_u,FIRST,REST,_uu,SUFFIX,_uuu,TRANSLATED) \ case FIRST##REST##SUFFIX : opt.set_##FIRST##REST(val TRANSLATED); break; #define X__MAXMEM(_u,FIRST,REST,_uu) X(_u,FIRST,REST,_uu) #include "enum_x_macro.h" default : caml_invalid_argument("invalid option\n"); } v_options = Field(v_options, 1); } compiled = new RE2(c_pat, opt); if (!compiled->ok()) { /* Warning from this point on it's no longer safe to access v_options or v_pattern as the GC might be invoked from caml_copy_string and move those values (as we haven't registered the paramters they wouldn't get updated). This is fine because we don't access them before we call caml_raise_with_arg. */ v_compile_error = caml_copy_string(compiled->error().c_str()); delete compiled; compiled = NULL; caml_raise_with_arg(*caml_named_value("mlre2__Regex_compile_failed"), v_compile_error); } v_retval = caml_alloc_custom(&mlre2__custom_regex_ops, sizeof(compiled), 1024*1024, /* RE2 object uses ~1MB of memory outside the OCaml heap */ 500*1024*1024); /* I'm okay with 500MB of RAM being wasted */ Regex_val(v_retval) = compiled; return v_retval; }