Exemple #1
0
	RE2Regex(const std::string& rx) : Regex(rx), regexcl(rx, RE2::Quiet)
	{
		if (!regexcl.ok())
		{
			throw RegexException(rx, regexcl.error());
		}
	}
Exemple #2
0
std::string replace_with_pattern(const std::string& item, const RE2& pattern, const std::string& target) {
  RE2::Arg argv[10];
  const RE2::Arg* args[10] = {&argv[0], &argv[1], &argv[2], &argv[3], &argv[4], &argv[5], &argv[6], &argv[7], &argv[8], &argv[9]};
  std::string arg[10];
  for (size_t i = 0; i < 10; i++) {
    argv[i] = &arg[i];
  }
  if (!RE2::FullMatchN(item, pattern, args, pattern.NumberOfCapturingGroups()))
    return item;

  return replace_matches(target, arg, '@', pattern.NumberOfCapturingGroups());
}
Exemple #3
0
int msc_regexec_ex(RE2 &re, const char *s, unsigned int slen, 
                   int startoffset, int *ovector, int ovecsize)
{
        size_t startpos = startoffset;
        const size_t endpos = slen;
        // Total # of submatches in the regex pattern
        int num_submatch = 1 + re.NumberOfCapturingGroups();
        // Index of the last non empty submatch
        int last_nonempty_submatch = num_submatch - 1;
        re2::StringPiece submatches[num_submatch];

        // If the string does not match the pattern
        if (!re.Match(s, startpos, endpos, RE2::UNANCHORED, submatches, num_submatch)) {
                return -1;
        }

        // Find the last non empty submatch 
        while (!submatches[last_nonempty_submatch].data()) {
                last_nonempty_submatch--;
        }

        int count = min(last_nonempty_submatch + 1, ovecsize / 3);
        // Extract submatch information as much as possible
        for (int i = 0; i < count; i++) {
                // An empty submatch
                if (!submatches[i].data()) {
                        ovector[2 * i] = -1;
                        ovector[2 * i + 1] = -1;
                } else {
                        ovector[2 * i] = submatches[i].data() - s;
                        ovector[2 * i + 1] = ovector[2 * i] + submatches[i].length();
                }
        }

        // The output vector has enough space to store the information of  
        // all non empty submatches + empty submatches among non empty submatches
        if (last_nonempty_submatch + 1 <= ovecsize / 3) {
                return last_nonempty_submatch + 1;
        }

        // Truncate empty submatches at the tail of 'ovector'        
        if (!submatches[ovecsize / 3 - 1].data()) {
                for (int i = ovecsize / 3 - 2; i >= 0; i--) {
                        if (submatches[i].data()) {
                                return i + 1;
                        }
                }
        }
        
        return 0;        
}
Exemple #4
0
JNIEXPORT jlong JNICALL Java_com_logentries_re2_RE2_compileImpl
  (JNIEnv *env, jclass cls, jstring j_str, jobject j_options) {
    Options options(env, j_options);
    const char *str = env->GetStringUTFChars(j_str, 0);
    RE2 *pointer = new RE2(str, options);
    if (pointer->ok()) {
        env->ReleaseStringUTFChars(j_str, str);
        jlong j_pointer = reinterpret_cast<jlong>(pointer);
        assert(reinterpret_cast<RE2*>(j_pointer) == pointer);
        return j_pointer;
    } else {
        throw_RegExprException(env, pointer->error().c_str());
        delete pointer;
        return 0;
    }
}
Exemple #5
0
  void mlre2__custom_regex_serialize(value v, unsigned long * wsize_32,
                                     unsigned long * wsize_64) {
    RE2 *re = Regex_val(v);
    size_t len = re->pattern().length() + 1;
    if (len > INT_MAX) {
      caml_failwith("cannot serialize regexes with patterns longer than INT_MAX");
    }
    caml_serialize_int_4((signed int) len);
    caml_serialize_block_1((char *) re->pattern().c_str(), len);
    caml_serialize_int_8(re->options().max_mem());
    caml_serialize_int_2(bitfield_of_options(re->options()));
#ifdef DEBUG
    std::cerr << "serialized regex /" << Regex_val(v)->pattern() << "/ (length "
      << len << ")" << std::endl;
#endif
    *wsize_32 = 4;
    *wsize_64 = 8;
  }
Exemple #6
0
  /* returns (cre2__obj_t * int * (string * int) list) where
   * - cre2__obj_t is the ML-side name for a custom_block with a struct regex *
   * - int is the number of submatches, including the whole match
   * - (string * int) list is the Map.to_alist of the submatch (name, index) Map.t
   */
  CAMLprim value mlre2__create_re(value v_options, value v_pattern) {
    value v_retval, v_compile_error;
    const char * c_pat = String_val(v_pattern);
    RE2::Options opt;
    RE2* compiled = NULL;

    opt.Copy(RE2::Quiet);
    while (v_options != Val_emptylist) {
      int val = Int_val(Field(Field(v_options, 0), 0));
      switch (Tag_val(Field(v_options, 0))) {
#define X(_u,FIRST,REST,_uu) case FIRST##REST : opt.set_##FIRST##REST(val); break;
#define X__ENCODING(_u,FIRST,REST,_uu,SUFFIX,_uuu,TRANSLATED)               \
        case FIRST##REST##SUFFIX : opt.set_##FIRST##REST(val TRANSLATED); break;
#define X__MAXMEM(_u,FIRST,REST,_uu) X(_u,FIRST,REST,_uu)
#include "enum_x_macro.h"
      default              : caml_invalid_argument("invalid option\n");
      }
      v_options = Field(v_options, 1);
    }

    compiled = new RE2(c_pat, opt);

    if (!compiled->ok()) {
      /* Warning
         from this point on it's no longer safe to access v_options or
         v_pattern as the GC might be invoked from caml_copy_string and
         move those values (as we haven't registered the paramters they
         wouldn't get updated).  This is fine because we don't access
         them before we call caml_raise_with_arg. */
      v_compile_error = caml_copy_string(compiled->error().c_str());
      delete compiled;
      compiled = NULL;
      caml_raise_with_arg(*caml_named_value("mlre2__Regex_compile_failed"),
          v_compile_error);
    }

    v_retval = caml_alloc_custom(&mlre2__custom_regex_ops, sizeof(compiled),
        1024*1024,      /* RE2 object uses ~1MB of memory outside the OCaml heap */
        500*1024*1024);  /* I'm okay with 500MB of RAM being wasted */

    Regex_val(v_retval) = compiled;

    return v_retval;
  }
Exemple #7
0
JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2Matcher_findImpl
  (JNIEnv *env, jclass cls, jobject matcher, jlong re2_pointer, jlong str_pointer, jint ngroups, jint start, jint end) {


    RE2 *regex = reinterpret_cast<RE2*>(re2_pointer);
    char *str = reinterpret_cast<char*>(str_pointer);

    StringPiece* groups;
    StringPiece stackgroups[stackSize];
    StringPiece* heapgroups = NULL;

    if (ngroups <= stackSize) {
        groups = stackgroups;
    } else {
        groups = new StringPiece[ngroups];
        heapgroups = groups;
    }

    StringPiece text(str);
    const bool res = regex->Match(text, start, end, RE2::UNANCHORED, groups, ngroups);
    if (res) {
        jclass matcher_class = env->FindClass("com/logentries/re2/RE2Matcher");
        jmethodID addID = env->GetStaticMethodID(matcher_class, "addGroup", "(Lcom/logentries/re2/RE2Matcher;II)V");
        for (int i=0; i<ngroups; i++) {
            if (groups[i] != NULL) {
                env->CallStaticObjectMethod(
                    matcher_class,
                    addID,
                    matcher,
                    static_cast<jint>(groups[i].data() - str),
                    static_cast<jint>(groups[i].data() - str + groups[i].size())
                );
            } else {
                env->CallStaticObjectMethod(matcher_class, addID,
                    matcher, static_cast<jint>(-1), static_cast<jint>(-1));
            }
        }
    }

    delete[] heapgroups;
    return static_cast<jboolean>(res);
}
Exemple #8
0
JNIEXPORT jobject JNICALL Java_com_logentries_re2_RE2_getCaptureGroupNamesImpl
  (JNIEnv *env, jclass cls, jlong j_pointer, jobjectArray j_args) {
    RE2 *pointer = reinterpret_cast<RE2*>(j_pointer);

    jclass j_array_list = env->FindClass("java/util/ArrayList");
    if (j_array_list == NULL) return NULL;

    jmethodID arrayListCtor = env->GetMethodID(j_array_list, "<init>", "()V");
    jmethodID add = env->GetMethodID(j_array_list, "add", "(Ljava/lang/Object;)Z");
    jobject java_array_list = env->NewObject(j_array_list, arrayListCtor);

    map<int, string> groupNames = (pointer->CapturingGroupNames());
    map<int, string>::iterator it;

    for (it = groupNames.begin(); it != groupNames.end(); ++it) {
		jstring jvalue = env->NewStringUTF(it->second.c_str());

		env->CallObjectMethod(java_array_list, add, jvalue);
    };

    return java_array_list;
}
Exemple #9
0
JNIEXPORT jint JNICALL Java_com_logentries_re2_RE2_numberOfCapturingGroupsImpl
  (JNIEnv *env, jclass cls, jlong re2_pointer) {

    RE2 *regex = reinterpret_cast<RE2*>(re2_pointer);
    return static_cast<jint>(regex->NumberOfCapturingGroups());
}