RE2Regex(const std::string& rx) : Regex(rx), regexcl(rx, RE2::Quiet) { if (!regexcl.ok()) { throw RegexException(rx, regexcl.error()); } }
std::string replace_with_pattern(const std::string& item, const RE2& pattern, const std::string& target) { RE2::Arg argv[10]; const RE2::Arg* args[10] = {&argv[0], &argv[1], &argv[2], &argv[3], &argv[4], &argv[5], &argv[6], &argv[7], &argv[8], &argv[9]}; std::string arg[10]; for (size_t i = 0; i < 10; i++) { argv[i] = &arg[i]; } if (!RE2::FullMatchN(item, pattern, args, pattern.NumberOfCapturingGroups())) return item; return replace_matches(target, arg, '@', pattern.NumberOfCapturingGroups()); }
int msc_regexec_ex(RE2 &re, const char *s, unsigned int slen, int startoffset, int *ovector, int ovecsize) { size_t startpos = startoffset; const size_t endpos = slen; // Total # of submatches in the regex pattern int num_submatch = 1 + re.NumberOfCapturingGroups(); // Index of the last non empty submatch int last_nonempty_submatch = num_submatch - 1; re2::StringPiece submatches[num_submatch]; // If the string does not match the pattern if (!re.Match(s, startpos, endpos, RE2::UNANCHORED, submatches, num_submatch)) { return -1; } // Find the last non empty submatch while (!submatches[last_nonempty_submatch].data()) { last_nonempty_submatch--; } int count = min(last_nonempty_submatch + 1, ovecsize / 3); // Extract submatch information as much as possible for (int i = 0; i < count; i++) { // An empty submatch if (!submatches[i].data()) { ovector[2 * i] = -1; ovector[2 * i + 1] = -1; } else { ovector[2 * i] = submatches[i].data() - s; ovector[2 * i + 1] = ovector[2 * i] + submatches[i].length(); } } // The output vector has enough space to store the information of // all non empty submatches + empty submatches among non empty submatches if (last_nonempty_submatch + 1 <= ovecsize / 3) { return last_nonempty_submatch + 1; } // Truncate empty submatches at the tail of 'ovector' if (!submatches[ovecsize / 3 - 1].data()) { for (int i = ovecsize / 3 - 2; i >= 0; i--) { if (submatches[i].data()) { return i + 1; } } } return 0; }
JNIEXPORT jlong JNICALL Java_com_logentries_re2_RE2_compileImpl (JNIEnv *env, jclass cls, jstring j_str, jobject j_options) { Options options(env, j_options); const char *str = env->GetStringUTFChars(j_str, 0); RE2 *pointer = new RE2(str, options); if (pointer->ok()) { env->ReleaseStringUTFChars(j_str, str); jlong j_pointer = reinterpret_cast<jlong>(pointer); assert(reinterpret_cast<RE2*>(j_pointer) == pointer); return j_pointer; } else { throw_RegExprException(env, pointer->error().c_str()); delete pointer; return 0; } }
void mlre2__custom_regex_serialize(value v, unsigned long * wsize_32, unsigned long * wsize_64) { RE2 *re = Regex_val(v); size_t len = re->pattern().length() + 1; if (len > INT_MAX) { caml_failwith("cannot serialize regexes with patterns longer than INT_MAX"); } caml_serialize_int_4((signed int) len); caml_serialize_block_1((char *) re->pattern().c_str(), len); caml_serialize_int_8(re->options().max_mem()); caml_serialize_int_2(bitfield_of_options(re->options())); #ifdef DEBUG std::cerr << "serialized regex /" << Regex_val(v)->pattern() << "/ (length " << len << ")" << std::endl; #endif *wsize_32 = 4; *wsize_64 = 8; }
/* returns (cre2__obj_t * int * (string * int) list) where * - cre2__obj_t is the ML-side name for a custom_block with a struct regex * * - int is the number of submatches, including the whole match * - (string * int) list is the Map.to_alist of the submatch (name, index) Map.t */ CAMLprim value mlre2__create_re(value v_options, value v_pattern) { value v_retval, v_compile_error; const char * c_pat = String_val(v_pattern); RE2::Options opt; RE2* compiled = NULL; opt.Copy(RE2::Quiet); while (v_options != Val_emptylist) { int val = Int_val(Field(Field(v_options, 0), 0)); switch (Tag_val(Field(v_options, 0))) { #define X(_u,FIRST,REST,_uu) case FIRST##REST : opt.set_##FIRST##REST(val); break; #define X__ENCODING(_u,FIRST,REST,_uu,SUFFIX,_uuu,TRANSLATED) \ case FIRST##REST##SUFFIX : opt.set_##FIRST##REST(val TRANSLATED); break; #define X__MAXMEM(_u,FIRST,REST,_uu) X(_u,FIRST,REST,_uu) #include "enum_x_macro.h" default : caml_invalid_argument("invalid option\n"); } v_options = Field(v_options, 1); } compiled = new RE2(c_pat, opt); if (!compiled->ok()) { /* Warning from this point on it's no longer safe to access v_options or v_pattern as the GC might be invoked from caml_copy_string and move those values (as we haven't registered the paramters they wouldn't get updated). This is fine because we don't access them before we call caml_raise_with_arg. */ v_compile_error = caml_copy_string(compiled->error().c_str()); delete compiled; compiled = NULL; caml_raise_with_arg(*caml_named_value("mlre2__Regex_compile_failed"), v_compile_error); } v_retval = caml_alloc_custom(&mlre2__custom_regex_ops, sizeof(compiled), 1024*1024, /* RE2 object uses ~1MB of memory outside the OCaml heap */ 500*1024*1024); /* I'm okay with 500MB of RAM being wasted */ Regex_val(v_retval) = compiled; return v_retval; }
JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2Matcher_findImpl (JNIEnv *env, jclass cls, jobject matcher, jlong re2_pointer, jlong str_pointer, jint ngroups, jint start, jint end) { RE2 *regex = reinterpret_cast<RE2*>(re2_pointer); char *str = reinterpret_cast<char*>(str_pointer); StringPiece* groups; StringPiece stackgroups[stackSize]; StringPiece* heapgroups = NULL; if (ngroups <= stackSize) { groups = stackgroups; } else { groups = new StringPiece[ngroups]; heapgroups = groups; } StringPiece text(str); const bool res = regex->Match(text, start, end, RE2::UNANCHORED, groups, ngroups); if (res) { jclass matcher_class = env->FindClass("com/logentries/re2/RE2Matcher"); jmethodID addID = env->GetStaticMethodID(matcher_class, "addGroup", "(Lcom/logentries/re2/RE2Matcher;II)V"); for (int i=0; i<ngroups; i++) { if (groups[i] != NULL) { env->CallStaticObjectMethod( matcher_class, addID, matcher, static_cast<jint>(groups[i].data() - str), static_cast<jint>(groups[i].data() - str + groups[i].size()) ); } else { env->CallStaticObjectMethod(matcher_class, addID, matcher, static_cast<jint>(-1), static_cast<jint>(-1)); } } } delete[] heapgroups; return static_cast<jboolean>(res); }
JNIEXPORT jobject JNICALL Java_com_logentries_re2_RE2_getCaptureGroupNamesImpl (JNIEnv *env, jclass cls, jlong j_pointer, jobjectArray j_args) { RE2 *pointer = reinterpret_cast<RE2*>(j_pointer); jclass j_array_list = env->FindClass("java/util/ArrayList"); if (j_array_list == NULL) return NULL; jmethodID arrayListCtor = env->GetMethodID(j_array_list, "<init>", "()V"); jmethodID add = env->GetMethodID(j_array_list, "add", "(Ljava/lang/Object;)Z"); jobject java_array_list = env->NewObject(j_array_list, arrayListCtor); map<int, string> groupNames = (pointer->CapturingGroupNames()); map<int, string>::iterator it; for (it = groupNames.begin(); it != groupNames.end(); ++it) { jstring jvalue = env->NewStringUTF(it->second.c_str()); env->CallObjectMethod(java_array_list, add, jvalue); }; return java_array_list; }
JNIEXPORT jint JNICALL Java_com_logentries_re2_RE2_numberOfCapturingGroupsImpl (JNIEnv *env, jclass cls, jlong re2_pointer) { RE2 *regex = reinterpret_cast<RE2*>(re2_pointer); return static_cast<jint>(regex->NumberOfCapturingGroups()); }