Ejemplo n.º 1
0
void
rspamd_regexp_library_init (void)
{
	if (global_re_cache == NULL) {
		global_re_cache = rspamd_regexp_cache_new ();
#ifdef HAVE_PCRE_JIT
		gint jit, rc;
		const gchar *str;


		rc = pcre_config (PCRE_CONFIG_JIT, &jit);

		if (rc == 0 && jit == 1) {
			pcre_config (PCRE_CONFIG_JITTARGET, &str);

			msg_info ("pcre is compiled with JIT for %s", str);

			can_jit = TRUE;
		}
		else {
			msg_info ("pcre is compiled without JIT support, so many optimisations"
					" are impossible");
		}
#else
		msg_info ("pcre is too old and has no JIT support, so many optimisations"
				" are impossible");
#endif
	}
}
Ejemplo n.º 2
0
static char *
ngx_regex_pcre_jit(ngx_conf_t *cf, void *post, void *data)
{
    ngx_flag_t  *fp = data;

    if (*fp == 0) {
        return NGX_CONF_OK;
    }

#if (NGX_HAVE_PCRE_JIT)
    {
    int  jit, r;

    jit = 0;
    r = pcre_config(PCRE_CONFIG_JIT, &jit);

    if (r != 0 || jit != 1) {
        ngx_conf_log_error(NGX_LOG_WARN, cf, 0,
                           "PCRE library does not support JIT");
        *fp = 0;
    }
    }
#else
    ngx_conf_log_error(NGX_LOG_WARN, cf, 0,
                       "nginx was built without PCRE JIT support");
    *fp = 0;
#endif

    return NGX_CONF_OK;
}
Ejemplo n.º 3
0
HMODULE LoadPcreLibrary(const char *szPath)
{
	_ASSERT(szPath);
	HMODULE hModule = LoadLibraryA(szPath);
	if (!hModule)
	{
		return NULL;
	}
	*(FARPROC*)&pcre_config = GetProcAddress(hModule, "pcre_config");
	*(FARPROC*)&pcre_compile = GetProcAddress(hModule, "pcre_compile");
	*(FARPROC*)&pcre_exec = GetProcAddress(hModule, "pcre_exec");
	*(FARPROC*)&pcre_study = GetProcAddress(hModule, "pcre_study");
	*(FARPROC*)&pcre_free = *(FARPROC*)GetProcAddress(hModule, "pcre_free"); // pcre_free is a pointer to a variable containing pointer to the function %)
	if (pcre_compile && pcre_exec && pcre_free)
	{

		int Utf8Supported = 0;
		if (pcre_config)
		{
			pcre_config(PCRE_CONFIG_UTF8, &Utf8Supported);
		}
		if (Utf8Supported)
		{
			return hModule;
		}

	}
	FreeLibrary(hModule);
	return NULL;
}
Ejemplo n.º 4
0
Archivo: pcre.c Proyecto: Lujaw/zsh
static int
zpcre_utf8_enabled(void)
{
#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
    static int have_utf8_pcre = -1;

    /* value can toggle based on MULTIBYTE, so don't
     * be too eager with caching */
    if (have_utf8_pcre < -1)
	return 0;

    if (!isset(MULTIBYTE))
	return 0;

    if ((have_utf8_pcre == -1) &&
        (!strcmp(nl_langinfo(CODESET), "UTF-8"))) {

	if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
	    have_utf8_pcre = -2; /* erk, failed to ask */
    }

    if (have_utf8_pcre < 0)
	return 0;
    return have_utf8_pcre;

#else
    return 0;
#endif
}
Ejemplo n.º 5
0
int luaopen_cs_utf8_regexp(lua_State *L) {
  int utf8_supported = 0;
  pcre_config(PCRE_CONFIG_UTF8, &utf8_supported);
  if (utf8_supported == 0)
    return luaL_error(L, "PCRE_CONFIG_UTF8_DISABLED");

  luaL_newmetatable(L, MR_MTBL_NAME);
  luaL_register(L, NULL, mr_methods);
  lua_setfield(L, -1, "__index");

  lua_createtable(L, 0, ARRAY_SIZE(mr_functions) - 1);
  luaL_register(L, NULL, mr_functions);
  lua_setfield(L, -2, "MatchResult");

  luaL_newmetatable(L, RE_MTBL_NAME);
  luaL_register(L, NULL, re_methods);
  lua_setfield(L, -1, "__index");

  lua_createtable(L, 0, ARRAY_SIZE(re_functions) - 1);
  luaL_register(L, NULL, re_functions);

  RE_OPTION_MAP(RE_OPTION_GEN);

  lua_setfield(L, -2, "regexp");

  return 1;
}
Ejemplo n.º 6
0
  void moduleInit() override {
    Native::registerConstant<KindOfString>(
      s_PCRE_VERSION.get(), makeStaticString(pcre_version())
    );

#define PCRECNS(c) Native::registerConstant<KindOfInt64> \
                    (makeStaticString("PREG_" #c), PHP_PCRE_##c);
    PCRECNS(NO_ERROR);
    PCRECNS(INTERNAL_ERROR);
    PCRECNS(BACKTRACK_LIMIT_ERROR);
    PCRECNS(RECURSION_LIMIT_ERROR);
    PCRECNS(BAD_UTF8_ERROR);
    PCRECNS(BAD_UTF8_OFFSET_ERROR);
#undef PCRECNS
#define PREGCNS(c) Native::registerConstant<KindOfInt64> \
                    (makeStaticString("PREG_" #c), PREG_##c);
    PREGCNS(PATTERN_ORDER);
    PREGCNS(SET_ORDER);
    PREGCNS(OFFSET_CAPTURE);

    PREGCNS(SPLIT_NO_EMPTY);
    PREGCNS(SPLIT_DELIM_CAPTURE);
    PREGCNS(SPLIT_OFFSET_CAPTURE);

    PREGCNS(GREP_INVERT);
#undef PREGCNS

    HHVM_FE(preg_filter);
    HHVM_FE(preg_grep);
    HHVM_FE(preg_match);
    HHVM_FE(preg_match_all);
    HHVM_FE(preg_replace);
    HHVM_FE(preg_replace_callback);
    HHVM_FE(preg_replace_callback_array);
    HHVM_FE(preg_split);
    HHVM_FE(preg_quote);
    HHVM_FE(preg_last_error);
    HHVM_FE(ereg_replace);
    HHVM_FE(eregi_replace);
    HHVM_FE(ereg);
    HHVM_FE(eregi);
    HHVM_FE(split);
    HHVM_FE(spliti);
    HHVM_FE(sql_regcase);

    loadSystemlib();

    pcre_config(PCRE_CONFIG_JIT, &s_pcre_has_jit);
    IniSetting::Bind(this, IniSetting::PHP_INI_ONLY,
                     "hhvm.pcre.jit",
                     &s_pcre_has_jit);
  }
Ejemplo n.º 7
0
int main(void)
{
	int jit = 0;
#ifdef SUPPORT_PCRE8
	pcre_config(PCRE_CONFIG_JIT, &jit);
#else
	pcre16_config(PCRE_CONFIG_JIT, &jit);
#endif
	if (!jit) {
		printf("JIT must be enabled to run pcre_jit_test\n");
		return 1;
	}
	return regression_tests();
}
Ejemplo n.º 8
0
/*---------------------------------------------------------------------------*/
  { NULL, 0 }
};

int Lpcre_config (lua_State *L);	/* XXX gcc warning */
int Lpcre_config (lua_State *L) {
  int val;
  flag_pair *fp;
  if (lua_istable (L, 1))
    lua_settop (L, 1);
  else
    lua_newtable (L);
  for (fp = pcre_config_flags; fp->key; ++fp) {
    if (0 == pcre_config (fp->val, &val)) {
      lua_pushinteger (L, val);
      lua_setfield (L, -2, fp->key);
    }
  }
  return 1;
}
Ejemplo n.º 9
0
	bool Regex::startup(string& startup_errors) {
		string err;
		if ( ! loadattempted ) {
			loadattempted = true;
			loaded = false;
			string libdir,libstr;
			if (!Environment::getbenv("OBYX_LIBPCRESO",libstr)) { 	//legacy method
				if (Environment::getbenv("OBYX_LIBPCREDIR",libdir)) {
					if (!libdir.empty() && *libdir.rbegin() != '/') libdir.push_back('/');
				}
				libstr = SO(libdir,libpcre);
			}
			pcre_lib_handle = dlopen(libstr.c_str(),RTLD_GLOBAL | RTLD_NOW);
			dlerr(err); //debug only.
			if (err.empty() && pcre_lib_handle != nullptr ) {
				pcre_compile = (pcre* (*)(const char*, int, const char**, int*,const unsigned char*)) dlsym(pcre_lib_handle,"pcre_compile"); dlerr(err);
				pcre_exec = (int (*)(const pcre*,const pcre_extra*,PCRE_SPTR,int,int,int,int*,int)) dlsym(pcre_lib_handle,"pcre_exec"); dlerr(err);
				pcre_config = (int (*)(int,void *)) dlsym(pcre_lib_handle,"pcre_config");  dlerr(err);
				pcre_study = (pcre_extra* (*)(const pcre*,int,const char**)) dlsym(pcre_lib_handle,"pcre_study");  dlerr(err);
				pcre_free = (void (*)(void *)) dlsym(pcre_lib_handle,"pcre_free");  dlerr(err);
				if ( err.empty() ) {
					if ( pcre_config != nullptr && pcre_compile != nullptr && pcre_exec!=nullptr) {
						int locr = 0;
						int dobo = pcre_config(PCRE_CONFIG_UTF8, &locr);
						if (locr != 1 || dobo != 0) { //dobo means that the flag is not supported...
							if (Logger::debugging()) {
								*Logger::log << Log::info << "Regex::startup() The pcre library was loaded, but utf-8 appears not to be supported." << Log::LO << Log::blockend;
							}
						}
						loaded = true;
					}
				} 
			}
			startup_errors.append(err);
		}
		return loaded;
	}
Ejemplo n.º 10
0
static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
	const char *error;
	int erroffset;
	int options = PCRE_MULTILINE;

	if (opt->ignore_case) {
		if (has_non_ascii(p->pattern))
			p->pcre1_tables = pcre_maketables();
		options |= PCRE_CASELESS;
	}
	if (is_utf8_locale() && has_non_ascii(p->pattern))
		options |= PCRE_UTF8;

	p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
				      p->pcre1_tables);
	if (!p->pcre1_regexp)
		compile_regexp_failed(p, error);

	p->pcre1_extra_info = pcre_study(p->pcre1_regexp, PCRE_STUDY_JIT_COMPILE, &error);
	if (!p->pcre1_extra_info && error)
		die("%s", error);

#ifdef GIT_PCRE1_USE_JIT
	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
	if (p->pcre1_jit_on == 1) {
		p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
		if (!p->pcre1_jit_stack)
			die("Couldn't allocate PCRE JIT stack");
		pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
	} else if (p->pcre1_jit_on != 0) {
		die("BUG: The pcre1_jit_on variable should be 0 or 1, not %d",
		    p->pcre1_jit_on);
	}
#endif
}
Ejemplo n.º 11
0
	inline int config(int what) const {
		int where, result(pcre_config(what, &where));
		return result >= 0 ? where : result;
	}
Ejemplo n.º 12
0
int main(int argc, char **argv) {
    char **paths = NULL;
    int i;
    int pcre_opts = PCRE_MULTILINE;
    int study_opts = 0;
    double time_diff;
    pthread_t *workers = NULL;
    int workers_len;

    set_log_level(LOG_LEVEL_WARN);

    work_queue = NULL;
    work_queue_tail = NULL;
    memset(&stats, 0, sizeof(stats));
    root_ignores = init_ignore(NULL);
    out_fd = stdout;
#ifdef USE_PCRE_JIT
    int has_jit = 0;
    pcre_config(PCRE_CONFIG_JIT, &has_jit);
    if (has_jit) {
        study_opts |= PCRE_STUDY_JIT_COMPILE;
    }
#endif

    gettimeofday(&(stats.time_start), NULL);

    parse_options(argc, argv, &paths);
    log_debug("PCRE Version: %s", pcre_version());

    if (opts.workers) {
        workers_len = opts.workers;
    } else {
        /* Experiments show that two worker threads appear to be optimal, both
         * on dual-core and quad-core systems. See
         * http://geoff.greer.fm/2012/09/07/the-silver-searcher-adding-pthreads/.
         * On single-core CPUs, more than one worker thread makes no sense.
         */
        int ncpus = (int)sysconf(_SC_NPROCESSORS_ONLN);
        workers_len = (ncpus >= 2) ? 2 : 1;
    }
    log_debug("Using %i workers", workers_len);
    done_adding_files = FALSE;
    workers = ag_calloc(workers_len, sizeof(pthread_t));
    if (pthread_cond_init(&files_ready, NULL)) {
        log_err("pthread_cond_init failed!");
        exit(2);
    }
    if (pthread_mutex_init(&print_mtx, NULL)) {
        log_err("pthread_mutex_init failed!");
        exit(2);
    }
    if (pthread_mutex_init(&stats_mtx, NULL)) {
        log_err("pthread_mutex_init failed!");
        exit(2);
    }
    if (pthread_mutex_init(&work_queue_mtx, NULL)) {
        log_err("pthread_mutex_init failed!");
        exit(2);
    }

    if (opts.casing == CASE_SMART) {
        opts.casing = contains_uppercase(opts.query) ? CASE_SENSITIVE : CASE_INSENSITIVE;
    }

    if (opts.literal) {
        if (opts.casing == CASE_INSENSITIVE) {
            /* Search routine needs the query to be lowercase */
            char *c = opts.query;
            for (; *c != '\0'; ++c) {
                *c = (char) tolower(*c);
            }
        }
        generate_skip_lookup(opts.query, opts.query_len, skip_lookup, opts.casing == CASE_SENSITIVE);
        if (opts.word_regexp) {
            init_wordchar_table();
            opts.literal_starts_wordchar = is_wordchar(opts.query[0]);
            opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]);
        }
    } else {
        if (opts.casing == CASE_INSENSITIVE) {
            pcre_opts = pcre_opts | PCRE_CASELESS;
        }
        if (opts.word_regexp) {
            char *word_regexp_query;
            asprintf(&word_regexp_query, "\\b%s\\b", opts.query);
            free(opts.query);
            opts.query = word_regexp_query;
            opts.query_len = strlen(opts.query);
        }
        compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts);
    }

    if (opts.search_stream) {
        search_stream(stdin, "");
    } else {
        for (i = 0; i < workers_len; i++) {
            int ptc_rc = pthread_create(&(workers[i]), NULL, &search_file_worker,
                    NULL);
            check_err(ptc_rc, "create worker thread");
        }
        for (i = 0; paths[i] != NULL; i++) {
            log_debug("searching path %s for %s", paths[i], opts.query);
            search_dir(root_ignores, paths[i], 0);
        }
        done_adding_files = TRUE;
        pthread_cond_broadcast(&files_ready);
        for (i = 0; i < workers_len; i++) {
            if (pthread_join(workers[i], NULL)) {
                log_err("pthread_join failed!");
                exit(2);
            }
        }
    }

    if (opts.stats) {
        gettimeofday(&(stats.time_end), NULL);
        time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) -
                    ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec);
        time_diff /= 1000000;

        printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff);
    }

    if (opts.pager) {
        pclose(out_fd);
    }
    pthread_cond_destroy(&files_ready);
    pthread_mutex_destroy(&work_queue_mtx);
    pthread_mutex_destroy(&stats_mtx);
    pthread_mutex_destroy(&print_mtx);
    cleanup_ignore(root_ignores);
    free(workers);
    free(paths);
    return 0;
}
Ejemplo n.º 13
0
  void moduleInit() override {
    HHVM_RC_STR(PCRE_VERSION, pcre_version());

    HHVM_RC_INT(PREG_NO_ERROR, PHP_PCRE_NO_ERROR);
    HHVM_RC_INT(PREG_INTERNAL_ERROR, PHP_PCRE_INTERNAL_ERROR);
    HHVM_RC_INT(PREG_BACKTRACK_LIMIT_ERROR, PHP_PCRE_BACKTRACK_LIMIT_ERROR);
    HHVM_RC_INT(PREG_RECURSION_LIMIT_ERROR, PHP_PCRE_RECURSION_LIMIT_ERROR);
    HHVM_RC_INT(PREG_BAD_UTF8_ERROR, PHP_PCRE_BAD_UTF8_ERROR);
    HHVM_RC_INT(PREG_BAD_UTF8_OFFSET_ERROR, PHP_PCRE_BAD_UTF8_OFFSET_ERROR);

    HHVM_RC_INT_SAME(PREG_PATTERN_ORDER);
    HHVM_RC_INT_SAME(PREG_SET_ORDER);
    HHVM_RC_INT_SAME(PREG_OFFSET_CAPTURE);

    HHVM_RC_INT_SAME(PREG_SPLIT_NO_EMPTY);
    HHVM_RC_INT_SAME(PREG_SPLIT_DELIM_CAPTURE);
    HHVM_RC_INT_SAME(PREG_SPLIT_OFFSET_CAPTURE);

    HHVM_RC_INT_SAME(PREG_GREP_INVERT);

#ifdef WNOHANG
    HHVM_RC_INT_SAME(WNOHANG);
#endif
#ifdef WUNTRACED
    HHVM_RC_INT_SAME(WUNTRACED);
#endif

#ifdef PRIO_PGRP
    HHVM_RC_INT_SAME(PRIO_PGRP);
#endif
#ifdef PRIO_USER
    HHVM_RC_INT_SAME(PRIO_USER);
#endif
#ifdef PRIO_PROCESS
    HHVM_RC_INT_SAME(PRIO_PROCESS);
#endif

    HHVM_FE(preg_filter);
    HHVM_FE(preg_grep);
    HHVM_FE(preg_match);
    HHVM_FE(preg_match_all);
    HHVM_FE(preg_replace);
    HHVM_FE(preg_replace_callback);
    HHVM_FE(preg_replace_callback_array);
    HHVM_FE(preg_split);
    HHVM_FE(preg_quote);
    HHVM_FE(preg_last_error);
    HHVM_FE(ereg_replace);
    HHVM_FE(eregi_replace);
    HHVM_FE(ereg);
    HHVM_FE(eregi);
    HHVM_FE(split);
    HHVM_FE(spliti);
    HHVM_FE(sql_regcase);

    loadSystemlib();

    pcre_config(PCRE_CONFIG_JIT, &s_pcre_has_jit);
    IniSetting::Bind(this, IniSetting::PHP_INI_ONLY,
                     "hhvm.pcre.jit",
                     &s_pcre_has_jit);
  }
Ejemplo n.º 14
0
static gboolean
log_matcher_pcre_re_compile(LogMatcher *s, const gchar *re, GError **error)
{
  LogMatcherPcreRe *self = (LogMatcherPcreRe *) s;
  gint rc;
  const gchar *re_comp = re;
  const gchar *errptr;
  gint erroffset;
  gint flags = 0;
  gint optflags = 0;
 
  g_return_val_if_fail(error == NULL || *error == NULL, FALSE);

  if (self->super.flags & LMF_ICASE)
    flags |= PCRE_CASELESS;
#ifdef PCRE_NEWLINE_ANYCRLF
  if (self->super.flags & LMF_NEWLINE)
    flags |= PCRE_NEWLINE_ANYCRLF;
#else
  if (self->super.flags & LMF_NEWLINE)
    msg_warning("syslog-ng was compiled against an old PCRE which doesn't support the 'newline' flag", NULL);
#endif
  if (self->super.flags & LMF_UTF8)
    {
       gint support;
       flags |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
       self->match_options |= PCRE_NO_UTF8_CHECK;
 
       pcre_config(PCRE_CONFIG_UTF8, &support);
       if (!support)
         {
           g_set_error(error, LOG_TEMPLATE_ERROR, 0, "PCRE library is compiled without UTF8 support and utf8 flag was present");
           return FALSE;
         }

       pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &support);
       if (!support)
         {
           g_set_error(error, LOG_TEMPLATE_ERROR, 0, "PCRE library is compiled without UTF8 properties support and utf8 flag was present");
           return FALSE;
         }
    }
 
  /* complile the regexp */ 
  self->pattern = pcre_compile2(re_comp, flags, &rc, &errptr, &erroffset, NULL);
  if (!self->pattern)
    {
      g_set_error(error, LOG_TEMPLATE_ERROR, 0, "Error while compiling PCRE expression, error=%s, error_at=%d", errptr, erroffset);
      return FALSE;
    }

#ifdef PCRE_STUDY_JIT_COMPILE
  optflags = PCRE_STUDY_JIT_COMPILE;
#endif

  /* optimize regexp */
  self->extra = pcre_study(self->pattern, optflags, &errptr);
  if (errptr != NULL)
    {
      g_set_error(error, LOG_TEMPLATE_ERROR, 0, "Error while optimizing regular expression, error=%s", errptr);
      return FALSE;
    }

  return TRUE;
}
Ejemplo n.º 15
0
int main(int argc, char **argv) {
    set_log_level(LOG_LEVEL_WARN);

#ifdef AG_DEBUG
    set_log_level(LOG_LEVEL_DEBUG);
    int i = 0;
    for (i = 0; i < argc; i++) {
        fprintf(stderr, "%s ", argv[i]);
    }
    fprintf(stderr, "\n");
#endif

    char *query = NULL;
    char *path = NULL;
    int pcre_opts = 0;
    int study_opts = 0;
    const char *pcre_err = NULL;
    int pcre_err_offset = 0;
    pcre *re = NULL;
    pcre_extra *re_extra = NULL;
    struct timeval time_start, time_end;
    double time_diff = 0.0;

    gettimeofday(&time_start, NULL);

    parse_options(argc, argv, &query, &path);

    if (opts.casing == CASE_INSENSITIVE) {
        pcre_opts = pcre_opts | PCRE_CASELESS;
    }

    log_debug("PCRE Version: %s", pcre_version());

    if (!opts.literal) {
        re = pcre_compile(query, pcre_opts, &pcre_err, &pcre_err_offset, NULL);
        if (re == NULL) {
            log_err("pcre_compile failed at position %i. Error: %s", pcre_err_offset, pcre_err);
            exit(1);
        }

#ifdef USE_PRCE_JIT
        int has_jit = 0;
        pcre_config(PCRE_CONFIG_JIT, &has_jit);
        if (has_jit) {
            study_opts = study_opts | PCRE_STUDY_JIT_COMPILE;
        }
#endif
        re_extra = pcre_study(re, study_opts, &pcre_err);
        if (re_extra == NULL) {
            log_err("pcre_study failed. Error: %s", pcre_err);
            exit(1);
        }
    }

    search_dir(re, re_extra, path, 0);

    if (opts.stats) {
        gettimeofday(&time_end, NULL);
        time_diff = ((long)time_end.tv_sec * 1000000 + time_end.tv_usec) - ((long)time_start.tv_sec * 1000000 + time_start.tv_usec);
        time_diff = time_diff / 1000000;

        printf("%ld files\n%ld bytes\n%f seconds\n", total_file_count, total_byte_count, time_diff);
    }

    pcre_free(re);
    pcre_free(re_extra); /* TODO I'm pretty sure I should call pcre_free_study() here */
    free(query);
    free(path);
    cleanup_ignore_patterns();

    return(0);
}
Ejemplo n.º 16
0
Archivo: main.c Proyecto: TidyHuang/DLP
int main(int argc, char **argv) {
    char **base_paths = NULL;
    char **paths = NULL;
    int i;
    int pcre_opts = PCRE_MULTILINE;
    int study_opts = 0;
    double time_diff;
    worker_t *workers = NULL;
    int workers_len;
    int num_cores;

#ifdef KJK_BUILD
    extern void setup_crash_handler(); /* in kjk_crash_handler.cpp */
    setup_crash_handler();
#endif

    set_log_level(LOG_LEVEL_WARN);

    work_queue = NULL;
    work_queue_tail = NULL;
    memset(&stats, 0, sizeof(stats));
    root_ignores = init_ignore(NULL, "", 0);
    out_fd = stdout;
#ifdef USE_PCRE_JIT
    int has_jit = 0;
    pcre_config(PCRE_CONFIG_JIT, &has_jit);
    if (has_jit) {
        study_opts |= PCRE_STUDY_JIT_COMPILE;
    }
#endif

    gettimeofday(&(stats.time_start), NULL);

    parse_options(argc, argv, &base_paths, &paths);
    log_debug("PCRE Version: %s", pcre_version());

#ifdef _WIN32
    {
        SYSTEM_INFO si;
        GetSystemInfo(&si);
        num_cores = si.dwNumberOfProcessors;
    }
#else
    num_cores = (int)sysconf(_SC_NPROCESSORS_ONLN);
#endif

    workers_len = num_cores;
    if (opts.literal) {
        workers_len--;
    }
    if (opts.workers) {
        workers_len = opts.workers;
    }
    if (workers_len < 1) {
        workers_len = 1;
    }

    log_debug("Using %i workers", workers_len);
    done_adding_files = FALSE;
    workers = (worker_t *) ag_calloc(workers_len, sizeof(worker_t));
	if (pthread_cond_init(&files_ready, NULL)) {
        die("pthread_cond_init failed!");
    }
    if (pthread_mutex_init(&print_mtx, NULL)) {
        die("pthread_mutex_init failed!");
    }
    if (pthread_mutex_init(&stats_mtx, NULL)) {
        die("pthread_mutex_init failed!");
    }
    if (pthread_mutex_init(&work_queue_mtx, NULL)) {
        die("pthread_mutex_init failed!");
    }

    if (opts.casing == CASE_SMART) {
        opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE;
    }

    if (opts.literal) {
        if (opts.casing == CASE_INSENSITIVE) {
            /* Search routine needs the query to be lowercase */
            char *c = opts.query;
            for (; *c != '\0'; ++c) {
                *c = (char)tolower(*c);
            }
        }
        generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE);
        find_skip_lookup = NULL;
        generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE);
        if (opts.word_regexp) {
            init_wordchar_table();
            opts.literal_starts_wordchar = is_wordchar(opts.query[0]);
            opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]);
        }
    } else {
        if (opts.casing == CASE_INSENSITIVE) {
            pcre_opts |= PCRE_CASELESS;
        }
        if (opts.word_regexp) {
            char *word_regexp_query;
            ag_asprintf(&word_regexp_query, "\\b%s\\b", opts.query);
            free(opts.query);
            opts.query = word_regexp_query;
            opts.query_len = strlen(opts.query);
        }
        compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts);
    }

    if (opts.search_stream) {
       // search_stream(stdin, "");
    } else {
        for (i = 0; i < workers_len; i++) {
            workers[i].id = i;
            int rv = pthread_create(&(workers[i].thread), NULL, &search_file_worker, &(workers[i].id));
            if (rv != 0) {
                die("error in pthread_create(): %s", strerror(rv));
            }
#if defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(USE_CPU_SET)
            if (opts.use_thread_affinity) {
                cpu_set_t cpu_set;
                CPU_ZERO(&cpu_set);
                CPU_SET(i % num_cores, &cpu_set);
                rv = pthread_setaffinity_np(workers[i].thread, sizeof(cpu_set), &cpu_set);
                if (rv != 0) {
                    die("error in pthread_setaffinity_np(): %s", strerror(rv));
                }
                log_debug("Thread %i set to CPU %i", i, i);
            } else {
                log_debug("Thread affinity disabled.");
            }
#else
            log_debug("No CPU affinity support.");
#endif
        }
        for (i = 0; paths[i] != NULL; i++) {
            log_debug("searching path %s for %s", paths[i], opts.query);
            symhash = NULL;
            ignores *ig = init_ignore(root_ignores, "", 0);
            struct stat s;
            s.st_dev = 0;
#ifndef _WIN32
            /* The device is ignored if opts.one_dev is false, so it's fine
             * to leave it at the default 0
             */
            if (opts.one_dev && lstat(paths[i], &s) == -1) {
                log_err("Failed to get device information for path %s. Skipping...", paths[i]);
            }
#endif
            search_dir(ig, base_paths[i], paths[i], 0, s.st_dev);
            cleanup_ignore(ig);
        }
        pthread_mutex_lock(&work_queue_mtx);
        done_adding_files = TRUE;
        pthread_cond_broadcast(&files_ready);
        pthread_mutex_unlock(&work_queue_mtx);
        for (i = 0; i < workers_len; i++) {
            if (pthread_join(workers[i].thread, NULL)) {
                die("pthread_join failed!");
            }
        }
    }

    if (opts.stats) {
        gettimeofday(&(stats.time_end), NULL);
        time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) -
                    ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec);
        time_diff /= 1000000;

        printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff);
    }

    if (opts.pager) {
        pclose(out_fd);
    }
    cleanup_options();
    pthread_cond_destroy(&files_ready);
    pthread_mutex_destroy(&work_queue_mtx);
    pthread_mutex_destroy(&stats_mtx);
    pthread_mutex_destroy(&print_mtx);
    cleanup_ignore(root_ignores);
    free(workers);
    for (i = 0; paths[i] != NULL; i++) {
        free(paths[i]);
        free(base_paths[i]);
    }
    free(base_paths);
    free(paths);
    if (find_skip_lookup) {
        free(find_skip_lookup);
    }
    return !opts.match_found;
}
Ejemplo n.º 17
0
int main(int argc, char **argv) {
    char **base_paths = NULL;
    char **paths = NULL;
    int i;
    int pcre_opts = PCRE_MULTILINE;
    int study_opts = 0;
    double time_diff;
    pthread_t *workers = NULL;
    int workers_len;

    set_log_level(LOG_LEVEL_WARN);

    work_queue = NULL;
    work_queue_tail = NULL;
    memset(&stats, 0, sizeof(stats));
    root_ignores = init_ignore(NULL, "", 0);
    out_fd = stdout;
#ifdef USE_PCRE_JIT
    int has_jit = 0;
    pcre_config(PCRE_CONFIG_JIT, &has_jit);
    if (has_jit) {
        study_opts |= PCRE_STUDY_JIT_COMPILE;
    }
#endif

    gettimeofday(&(stats.time_start), NULL);

    parse_options(argc, argv, &base_paths, &paths);
    log_debug("PCRE Version: %s", pcre_version());

#ifdef _WIN32
    {
        SYSTEM_INFO si;
        GetSystemInfo(&si);
        workers_len = si.dwNumberOfProcessors;
    }
#else
    workers_len = (int)sysconf(_SC_NPROCESSORS_ONLN);
#endif
    if (opts.literal) {
        workers_len--;
    }
    if (opts.workers) {
        workers_len = opts.workers;
    }
    if (workers_len < 1) {
        workers_len = 1;
    }

    log_debug("Using %i workers", workers_len);
    done_adding_files = FALSE;
    workers = ag_calloc(workers_len, sizeof(pthread_t));
    if (pthread_cond_init(&files_ready, NULL)) {
        die("pthread_cond_init failed!");
    }
    if (pthread_mutex_init(&print_mtx, NULL)) {
        die("pthread_mutex_init failed!");
    }
    if (pthread_mutex_init(&stats_mtx, NULL)) {
        die("pthread_mutex_init failed!");
    }
    if (pthread_mutex_init(&work_queue_mtx, NULL)) {
        die("pthread_mutex_init failed!");
    }

    if (opts.casing == CASE_SMART) {
        opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE;
    }

    if (opts.literal) {
        if (opts.casing == CASE_INSENSITIVE) {
            /* Search routine needs the query to be lowercase */
            char *c = opts.query;
            for (; *c != '\0'; ++c) {
                *c = (char)tolower(*c);
            }
        }
        generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE);
        find_skip_lookup = NULL;
        generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE);
        if (opts.word_regexp) {
            init_wordchar_table();
            opts.literal_starts_wordchar = is_wordchar(opts.query[0]);
            opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]);
        }
    } else {
        if (opts.casing == CASE_INSENSITIVE) {
            pcre_opts |= PCRE_CASELESS;
        }
        if (opts.word_regexp) {
            char *word_regexp_query;
            ag_asprintf(&word_regexp_query, "\\b%s\\b", opts.query);
            free(opts.query);
            opts.query = word_regexp_query;
            opts.query_len = strlen(opts.query);
        }
        compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts);
    }

    if (opts.search_stream) {
        search_stream(stdin, "");
    } else {
        for (i = 0; i < workers_len; i++) {
            int rv = pthread_create(&(workers[i]), NULL, &search_file_worker, &i);
            if (rv != 0) {
                die("error in pthread_create(): %s", strerror(rv));
            }
        }
        for (i = 0; paths[i] != NULL; i++) {
            log_debug("searching path %s for %s", paths[i], opts.query);
            symhash = NULL;
            ignores *ig = init_ignore(root_ignores, "", 0);
            search_dir(ig, base_paths[i], paths[i], 0);
            cleanup_ignore(ig);
        }
        pthread_mutex_lock(&work_queue_mtx);
        done_adding_files = TRUE;
        pthread_cond_broadcast(&files_ready);
        pthread_mutex_unlock(&work_queue_mtx);
        for (i = 0; i < workers_len; i++) {
            if (pthread_join(workers[i], NULL)) {
                die("pthread_join failed!");
            }
        }
    }

    if (opts.stats) {
        gettimeofday(&(stats.time_end), NULL);
        time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) -
                    ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec);
        time_diff /= 1000000;

        printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff);
    }

    if (opts.pager) {
        pclose(out_fd);
    }
    cleanup_options();
    pthread_cond_destroy(&files_ready);
    pthread_mutex_destroy(&work_queue_mtx);
    pthread_mutex_destroy(&stats_mtx);
    pthread_mutex_destroy(&print_mtx);
    cleanup_ignore(root_ignores);
    free(workers);
    for (i = 0; paths[i] != NULL; i++) {
        free(paths[i]);
        free(base_paths[i]);
    }
    free(base_paths);
    free(paths);
    if (find_skip_lookup) {
        free(find_skip_lookup);
    }
    return !opts.match_found;
}
static void
thunar_sbr_replace_renamer_init (ThunarSbrReplaceRenamer *replace_renamer)
{
  AtkRelationSet *relations;
  AtkRelation    *relation;
  AtkObject      *object;
  GtkWidget      *table;
  GtkWidget      *label;
  GtkWidget      *entry;
  GtkWidget      *button;

#ifdef HAVE_PCRE
  /* check if PCRE supports UTF-8 */
  if (pcre_config (PCRE_CONFIG_UTF8, &replace_renamer->regexp_supported) != 0)
    replace_renamer->regexp_supported = FALSE;
#endif

  /* allocate the shared tooltips */
  replace_renamer->tooltips = gtk_tooltips_new ();
  exo_gtk_object_ref_sink (GTK_OBJECT (replace_renamer->tooltips));

  table = gtk_table_new (2, 3, FALSE);
  gtk_table_set_row_spacings (GTK_TABLE (table), 6);
  gtk_table_set_col_spacings (GTK_TABLE (table), 12);
  gtk_box_pack_start (GTK_BOX (replace_renamer), table, FALSE, FALSE, 0);
  gtk_widget_show (table);

  label = gtk_label_new_with_mnemonic (_("_Search For:"));
  gtk_misc_set_alignment (GTK_MISC (label), 0.0f, 0.5f);
  gtk_table_attach (GTK_TABLE (table), label, 0, 1, 0, 1, GTK_FILL, 0, 0, 0);
  gtk_widget_show (label);

  replace_renamer->pattern_entry = gtk_entry_new ();
  gtk_entry_set_activates_default (GTK_ENTRY (replace_renamer->pattern_entry), TRUE);
  exo_mutual_binding_new (G_OBJECT (replace_renamer->pattern_entry), "text", G_OBJECT (replace_renamer), "pattern");
  gtk_tooltips_set_tip (replace_renamer->tooltips, replace_renamer->pattern_entry, _("Enter the text to search for in the file names."), NULL);
  gtk_table_attach (GTK_TABLE (table), replace_renamer->pattern_entry, 1, 2, 0, 1, GTK_EXPAND | GTK_FILL, 0, 0, 0);
  gtk_label_set_mnemonic_widget (GTK_LABEL (label), replace_renamer->pattern_entry);
  gtk_widget_show (replace_renamer->pattern_entry);

  /* set Atk label relation for the entry */
  object = gtk_widget_get_accessible (replace_renamer->pattern_entry);
  relations = atk_object_ref_relation_set (gtk_widget_get_accessible (label));
  relation = atk_relation_new (&object, 1, ATK_RELATION_LABEL_FOR);
  atk_relation_set_add (relations, relation);
  g_object_unref (G_OBJECT (relation));

  button = gtk_check_button_new_with_mnemonic (_("Regular _Expression"));
  exo_mutual_binding_new (G_OBJECT (button), "active", G_OBJECT (replace_renamer), "regexp");
  gtk_tooltips_set_tip (replace_renamer->tooltips, button, _("If you enable this option, the pattern will be treated as a regular expression and "
                                                             "matched using the Perl-compatible regular expressions (PCRE). Check the documentation "
                                                             "for details about the regular expression syntax."), NULL);
  gtk_table_attach (GTK_TABLE (table), button, 2, 3, 0, 1, GTK_FILL, 0, 0, 0);
  gtk_widget_set_sensitive (button, replace_renamer->regexp_supported);
  gtk_widget_show (button);

  label = gtk_label_new_with_mnemonic (_("Replace _With:"));
  gtk_misc_set_alignment (GTK_MISC (label), 0.0f, 0.5f);
  gtk_table_attach (GTK_TABLE (table), label, 0, 1, 1, 2, GTK_FILL, 0, 0, 0);
  gtk_widget_show (label);

  entry = gtk_entry_new ();
  gtk_entry_set_activates_default (GTK_ENTRY (entry), TRUE);
  exo_mutual_binding_new (G_OBJECT (entry), "text", G_OBJECT (replace_renamer), "replacement");
  gtk_tooltips_set_tip (replace_renamer->tooltips, entry, _("Enter the text that should be used as replacement for the pattern above."), NULL);
  gtk_table_attach (GTK_TABLE (table), entry, 1, 2, 1, 2, GTK_EXPAND | GTK_FILL, 0, 0, 0);
  gtk_label_set_mnemonic_widget (GTK_LABEL (label), entry);
  gtk_widget_show (entry);

  /* set Atk label relation for the entry */
  object = gtk_widget_get_accessible (entry);
  relations = atk_object_ref_relation_set (gtk_widget_get_accessible (label));
  relation = atk_relation_new (&object, 1, ATK_RELATION_LABEL_FOR);
  atk_relation_set_add (relations, relation);
  g_object_unref (G_OBJECT (relation));

  button = gtk_check_button_new_with_mnemonic (_("C_ase Sensitive Search"));
  exo_mutual_binding_new (G_OBJECT (button), "active", G_OBJECT (replace_renamer), "case-sensitive");
  gtk_tooltips_set_tip (replace_renamer->tooltips, button, _("If you enable this option, the pattern will be searched in a case-sensitive manner. "
                                                             "The default is to use a case-insensitive search."), NULL);
  gtk_table_attach (GTK_TABLE (table), button, 2, 3, 1, 2, GTK_FILL, 0, 0, 0);
  gtk_widget_show (button);
}
Ejemplo n.º 19
0
/* Generic stub for getting integer results from pcre_config */
static int pcre_config_int(int what)
{
  int ret;
  pcre_config(what, (void *) &ret);
  return ret;
}
Ejemplo n.º 20
0
Archivo: mire.c Proyecto: avokhmin/RPM5
int mireSetGOptions(const char * newline, int caseless, int multiline, int utf8)
{
    int rc = 0;

    if (caseless) {
#if defined(PCRE_CASELESS)
	_mirePCREoptions |= PCRE_CASELESS;
#endif
	_mireREGEXoptions |= REG_ICASE;
#if defined(FNM_CASEFOLD)
	_mireGLOBoptions |= FNM_CASEFOLD;
#endif
    } else {
#if defined(PCRE_CASELESS)
	_mirePCREoptions &= ~PCRE_CASELESS;
#endif
	_mireREGEXoptions &= ~REG_ICASE;
#if defined(FNM_CASEFOLD)
	_mireGLOBoptions &= ~FNM_CASEFOLD;
#endif
    }

    if (multiline) {
#if defined(PCRE_MULTILINE)
	_mirePCREoptions |= PCRE_MULTILINE|PCRE_FIRSTLINE;
#endif
    } else {
#if defined(PCRE_MULTILINE)
	_mirePCREoptions &= ~(PCRE_MULTILINE|PCRE_FIRSTLINE);
#endif
    }

    if (utf8) {
#if defined(PCRE_UTF8)
	_mirePCREoptions |= PCRE_UTF8;
#endif
    } else {
#if defined(PCRE_UTF8)
	_mirePCREoptions &= ~PCRE_UTF8;
#endif
    }

    /*
     * Set the default line ending value from the default in the PCRE library;
     * "lf", "cr", "crlf", and "any" are supported. Anything else is treated
     * as "lf".
     */
    if (newline == NULL) {
	int val = 0;
#if defined(PCRE_CONFIG_NEWLINE)
/*@-modunconnomods@*/
	(void)pcre_config(PCRE_CONFIG_NEWLINE, &val);
/*@=modunconnomods@*/
#endif
	switch (val) {
	default:	newline = "lf";		break;
	case '\r':	newline = "cr";		break;
/*@-shiftimplementation@*/
	case ('\r' << 8) | '\n': newline = "crlf"; break;
/*@=shiftimplementation@*/
	case -1:	newline = "any";	break;
	case -2:	newline = "anycrlf";	break;
	}
    }

    /* Interpret the newline type; the default settings are Unix-like. */
    if (!strcasecmp(newline, "cr")) {
#if defined(PCRE_NEWLINE_CR)
	_mirePCREoptions |= PCRE_NEWLINE_CR;
#endif
	_mireEL = EL_CR;
    } else if (!strcasecmp(newline, "lf")) {
#if defined(PCRE_NEWLINE_LF)
	_mirePCREoptions |= PCRE_NEWLINE_LF;
#endif
	_mireEL = EL_LF;
    } else if (!strcasecmp(newline, "crlf")) {
#if defined(PCRE_NEWLINE_CRLF)
	_mirePCREoptions |= PCRE_NEWLINE_CRLF;
#endif
	_mireEL = EL_CRLF;
    } else if (!strcasecmp(newline, "any")) {
#if defined(PCRE_NEWLINE_ANY)
	_mirePCREoptions |= PCRE_NEWLINE_ANY;
#endif
	_mireEL = EL_ANY;
    } else if (!strcasecmp(newline, "anycrlf")) {
#if defined(PCRE_NEWLINE_ANYCRLF)
	_mirePCREoptions |= PCRE_NEWLINE_ANYCRLF;
#endif
	_mireEL = EL_ANYCRLF;
    } else {
	rc = -1;
    }

    return rc;
}
Ejemplo n.º 21
0
word loader(CrocThread* t)
{
	loadPCRESharedLib(t);

	// Check that we have an appropriate libpcre, first..
	{
		auto vers = atoda(pcre_version());
		vers = vers.slice(0, strLocateChar(vers, ' '));
		auto dotPos = strLocateChar(vers, '.');

		auto major = strtol(cast(const char*)vers.ptr, nullptr, 10);
		auto minor = strtol(cast(const char*)vers.ptr + dotPos + 1, nullptr, 10);

		if(major < 7 || (major == 7 && minor < 4))
		{
			croc_eh_throwStd(t, "RuntimeError", "Your PCRE library is only version %.*s. You need 7.4 or higher.",
				cast(int)vers.length, vers.ptr);
		}

		int haveUtf8;
		pcre_config(PCRE_CONFIG_UTF8, &haveUtf8);

		if(!haveUtf8)
			croc_eh_throwStd(t, "RuntimeError", "Your PCRE library was not built with UTF-8 support.");
	}

	croc_class_new(t, "Regex", 0);
		croc_pushNull(t);   croc_class_addHField(t, -2, _Ptrs);      // memblock
		croc_pushNull(t);   croc_class_addHField(t, -2, _Names);     // table
		croc_pushNull(t);   croc_class_addHField(t, -2, _GroupIdx);  // memblock
		croc_pushNull(t);   croc_class_addHField(t, -2, _Subject);   // string
		croc_pushInt(t, 0); croc_class_addHField(t, -2, _NumGroups); // int
		croc_pushInt(t, 0); croc_class_addHField(t, -2, _NextStart); // int

		registerMethods(t, Regex_methodFuncs);
		registerMethodUV(t, Regex_opApplyFunc);

		croc_field(t, -1, "match");
		croc_class_addMethod(t, -2, "opIndex");
	croc_newGlobal(t, "Regex");

#ifdef CROC_BUILTIN_DOCS
	CrocDoc doc;
	croc_ex_doc_init(t, &doc, __FILE__);
	croc_dup(t, 0);
	croc_ex_doc_push(&doc, moduleDocs);
		croc_field(t, -1, "Regex");
			croc_ex_doc_push(&doc, RegexDocs);
			docFields(&doc, Regex_methodFuncs);
			docFieldUV(&doc, Regex_opApplyFunc);

			docField(&doc, {Regex_opIndex_info, nullptr});
			croc_ex_doc_pop(&doc, -1);
		croc_popTop(t);
	croc_ex_doc_pop(&doc, -1);
	croc_ex_doc_finish(&doc);
	croc_popTop(t);
#endif

	return 0;
}
Ejemplo n.º 22
0
int main(int argc, char **argv)
{
pcre *re;
const char *error;
char *pattern;
char *subject;
unsigned char *name_table;
unsigned int option_bits;
int erroffset;
int find_all;
int crlf_is_newline;
int namecount;
int name_entry_size;
int ovector[OVECCOUNT];
int subject_length;
int rc, i;
int utf8;


/**************************************************************************
* First, sort out the command line. There is only one possible option at  *
* the moment, "-g" to request repeated matching to find all occurrences,  *
* like Perl's /g option. We set the variable find_all to a non-zero value *
* if the -g option is present. Apart from that, there must be exactly two *
* arguments.                                                              *
**************************************************************************/

find_all = 0;
for (i = 1; i < argc; i++)
  {
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
    else break;
  }

/* After the options, we require exactly two arguments, which are the pattern,
and the subject string. */

if (argc - i != 2)
  {
  printf("Two arguments required: a regex and a subject string\n");
  return 1;
  }

pattern = argv[i];
subject = argv[i+1];
subject_length = (int)strlen(subject);


/*************************************************************************
* Now we are going to compile the regular expression pattern, and handle *
* and errors that are detected.                                          *
*************************************************************************/

re = pcre_compile(
  pattern,              /* the pattern */
  0,                    /* default options */
  &error,               /* for error message */
  &erroffset,           /* for error offset */
  NULL);                /* use default character tables */

/* Compilation failed: print the error message and exit */

if (re == NULL)
  {
  printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
  return 1;
  }


/*************************************************************************
* If the compilation succeeded, we call PCRE again, in order to do a     *
* pattern match against the subject string. This does just ONE match. If *
* further matching is needed, it will be done below.                     *
*************************************************************************/

rc = pcre_exec(
  re,                   /* the compiled pattern */
  NULL,                 /* no extra data - we didn't study the pattern */
  subject,              /* the subject string */
  subject_length,       /* the length of the subject */
  0,                    /* start at offset 0 in the subject */
  0,                    /* default options */
  ovector,              /* output vector for substring information */
  OVECCOUNT);           /* number of elements in the output vector */

/* Matching failed: handle error cases */

if (rc < 0)
  {
  switch(rc)
    {
    case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
    /*
    Handle other special cases if you like
    */
    default: printf("Matching error %d\n", rc); break;
    }
  pcre_free(re);     /* Release memory used for the compiled pattern */
  return 1;
  }

/* Match succeded */

printf("\nMatch succeeded at offset %d\n", ovector[0]);


/*************************************************************************
* We have found the first match within the subject string. If the output *
* vector wasn't big enough, say so. Then output any substrings that were *
* captured.                                                              *
*************************************************************************/

/* The output vector wasn't big enough */

if (rc == 0)
  {
  rc = OVECCOUNT/3;
  printf("ovector only has room for %d captured substrings\n", rc - 1);
  }

/* Show substrings stored in the output vector by number. Obviously, in a real
application you might want to do things other than print them. */

for (i = 0; i < rc; i++)
  {
  char *substring_start = subject + ovector[2*i];
  int substring_length = ovector[2*i+1] - ovector[2*i];
  printf("%2d: %.*s\n", i, substring_length, substring_start);
  }


/**************************************************************************
* That concludes the basic part of this demonstration program. We have    *
* compiled a pattern, and performed a single match. The code that follows *
* shows first how to access named substrings, and then how to code for    *
* repeated matches on the same subject.                                   *
**************************************************************************/

/* See if there are any named substrings, and if so, show them by name. First
we have to extract the count of named parentheses from the pattern. */

(void)pcre_fullinfo(
  re,                   /* the compiled pattern */
  NULL,                 /* no extra data - we didn't study the pattern */
  PCRE_INFO_NAMECOUNT,  /* number of named substrings */
  &namecount);          /* where to put the answer */

if (namecount <= 0) printf("No named substrings\n"); else
  {
  unsigned char *tabptr;
  printf("Named substrings\n");

  /* Before we can access the substrings, we must extract the table for
  translating names to numbers, and the size of each entry in the table. */

  (void)pcre_fullinfo(
    re,                       /* the compiled pattern */
    NULL,                     /* no extra data - we didn't study the pattern */
    PCRE_INFO_NAMETABLE,      /* address of the table */
    &name_table);             /* where to put the answer */

  (void)pcre_fullinfo(
    re,                       /* the compiled pattern */
    NULL,                     /* no extra data - we didn't study the pattern */
    PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
    &name_entry_size);        /* where to put the answer */

  /* Now we can scan the table and, for each entry, print the number, the name,
  and the substring itself. */

  tabptr = name_table;
  for (i = 0; i < namecount; i++)
    {
    int n = (tabptr[0] << 8) | tabptr[1];
    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
      ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
    tabptr += name_entry_size;
    }
  }


/*************************************************************************
* If the "-g" option was given on the command line, we want to continue  *
* to search for additional matches in the subject string, in a similar   *
* way to the /g option in Perl. This turns out to be trickier than you   *
* might think because of the possibility of matching an empty string.    *
* What happens is as follows:                                            *
*                                                                        *
* If the previous match was NOT for an empty string, we can just start   *
* the next match at the end of the previous one.                         *
*                                                                        *
* If the previous match WAS for an empty string, we can't do that, as it *
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
* The first of these tells PCRE that an empty string at the start of the *
* subject is not a valid match; other possibilities must be tried. The   *
* second flag restricts PCRE to one match attempt at the initial string  *
* position. If this match succeeds, an alternative to the empty string   *
* match has been found, and we can print it and proceed round the loop,  *
* advancing by the length of whatever was found. If this match does not  *
* succeed, we still stay in the loop, advancing by just one character.   *
* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
* more than one byte.                                                    *
*                                                                        *
* However, there is a complication concerned with newlines. When the     *
* newline convention is such that CRLF is a valid newline, we want must  *
* advance by two characters rather than one. The newline convention can  *
* be set in the regex by (*CR), etc.; if not, we must find the default.  *
*************************************************************************/

if (!find_all)     /* Check for -g */
  {
  pcre_free(re);   /* Release the memory used for the compiled pattern */
  return 0;        /* Finish unless -g was given */
  }

/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
sequence. First, find the options with which the regex was compiled; extract
the UTF-8 state, and mask off all but the newline options. */

(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
utf8 = option_bits & PCRE_UTF8;
option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
               PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;

/* If no newline options were set, find the default newline convention from the
build configuration. */

if (option_bits == 0)
  {
  int d;
  (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
  /* Note that these values are always the ASCII ones, even in
  EBCDIC environments. CR = 13, NL = 10. */
  option_bits = (d == 13)? PCRE_NEWLINE_CR :
          (d == 10)? PCRE_NEWLINE_LF :
          (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
          (d == -2)? PCRE_NEWLINE_ANYCRLF :
          (d == -1)? PCRE_NEWLINE_ANY : 0;
  }

/* See if CRLF is a valid newline sequence. */

crlf_is_newline =
     option_bits == PCRE_NEWLINE_ANY ||
     option_bits == PCRE_NEWLINE_CRLF ||
     option_bits == PCRE_NEWLINE_ANYCRLF;

/* Loop for second and subsequent matches */

for (;;)
  {
  int options = 0;                 /* Normally no options */
  int start_offset = ovector[1];   /* Start at end of previous match */

  /* If the previous match was for an empty string, we are finished if we are
  at the end of the subject. Otherwise, arrange to run another match at the
  same point to see if a non-empty match can be found. */

  if (ovector[0] == ovector[1])
    {
    if (ovector[0] == subject_length) break;
    options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
    }

  /* Run the next matching operation */

  rc = pcre_exec(
    re,                   /* the compiled pattern */
    NULL,                 /* no extra data - we didn't study the pattern */
    subject,              /* the subject string */
    subject_length,       /* the length of the subject */
    start_offset,         /* starting offset in the subject */
    options,              /* options */
    ovector,              /* output vector for substring information */
    OVECCOUNT);           /* number of elements in the output vector */

  /* This time, a result of NOMATCH isn't an error. If the value in "options"
  is zero, it just means we have found all possible matches, so the loop ends.
  Otherwise, it means we have failed to find a non-empty-string match at a
  point where there was a previous empty-string match. In this case, we do what
  Perl does: advance the matching position by one character, and continue. We
  do this by setting the "end of previous match" offset, because that is picked
  up at the top of the loop as the point at which to start again.

  There are two complications: (a) When CRLF is a valid newline sequence, and
  the current position is just before it, advance by an extra byte. (b)
  Otherwise we must ensure that we skip an entire UTF-8 character if we are in
  UTF-8 mode. */

  if (rc == PCRE_ERROR_NOMATCH)
    {
    if (options == 0) break;                    /* All matches found */
    ovector[1] = start_offset + 1;              /* Advance one byte */
    if (crlf_is_newline &&                      /* If CRLF is newline & */
        start_offset < subject_length - 1 &&    /* we are at CRLF, */
        subject[start_offset] == '\r' &&
        subject[start_offset + 1] == '\n')
      ovector[1] += 1;                          /* Advance by one more. */
    else if (utf8)                              /* Otherwise, ensure we */
      {                                         /* advance a whole UTF-8 */
      while (ovector[1] < subject_length)       /* character. */
        {
        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
        ovector[1] += 1;
        }
      }
    continue;    /* Go round the loop again */
    }

  /* Other matching errors are not recoverable. */

  if (rc < 0)
    {
    printf("Matching error %d\n", rc);
    pcre_free(re);    /* Release memory used for the compiled pattern */
    return 1;
    }

  /* Match succeded */

  printf("\nMatch succeeded again at offset %d\n", ovector[0]);

  /* The match succeeded, but the output vector wasn't big enough. */

  if (rc == 0)
    {
    rc = OVECCOUNT/3;
    printf("ovector only has room for %d captured substrings\n", rc - 1);
    }

  /* As before, show substrings stored in the output vector by number, and then
  also any named substrings. */

  for (i = 0; i < rc; i++)
    {
    char *substring_start = subject + ovector[2*i];
    int substring_length = ovector[2*i+1] - ovector[2*i];
    printf("%2d: %.*s\n", i, substring_length, substring_start);
    }

  if (namecount <= 0) printf("No named substrings\n"); else
    {
    unsigned char *tabptr = name_table;
    printf("Named substrings\n");
    for (i = 0; i < namecount; i++)
      {
      int n = (tabptr[0] << 8) | tabptr[1];
      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
        ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
      tabptr += name_entry_size;
      }
    }
  }      /* End of loop to find second and subsequent matches */

printf("\n");
pcre_free(re);       /* Release memory used for the compiled pattern */
return 0;
}
Ejemplo n.º 23
0
CAMLprim value pcre_exec_stub0(
    intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
    value v_ovec, value v_maybe_cof, value v_workspace)
{
  int ret;
  int is_dfa = v_workspace != (value) NULL;
  long
    pos = v_pos,
    len = caml_string_length(v_subj),
    subj_start = v_subj_start;
  long ovec_len = Wosize_val(v_ovec);

  if (pos > len || pos < subj_start)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal position");

  if (subj_start > len || subj_start < 0)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start");

  pos -= subj_start;
  len -= subj_start;

  {
    const pcre *code = get_rex(v_rex);  /* Compiled pattern */
    const pcre_extra *extra = get_extra(v_rex);  /* Extra info */
    const char *ocaml_subj =
      String_val(v_subj) + subj_start;  /* Subject string */
    const int opt = v_opt;  /* Runtime options */

    /* Special case when no callout functions specified */
    if (v_maybe_cof == None) {
      int *ovec = (int *) &Field(v_ovec, 0);

      /* Performs the match */
      if (is_dfa)
        ret =
          pcre_dfa_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len,
              (int *) &Field(v_workspace, 0), Wosize_val(v_workspace));
      else
        ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len);

      if (ret < 0) handle_exec_error("pcre_exec_stub", ret);
      else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
    }

    /* There are callout functions */
    else {
      value v_cof = Field(v_maybe_cof, 0);
      value v_substrings;
      char *subj = caml_stat_alloc(sizeof(char) * len);
      int *ovec = caml_stat_alloc(sizeof(int) * ovec_len);
      int workspace_len;
      int *workspace;
      struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
      struct pcre_extra new_extra =
#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
# ifdef PCRE_EXTRA_MARK
#  ifdef PCRE_EXTRA_EXECUTABLE_JIT
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL };
#  else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL };
#  endif
# else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
# endif
#else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
#endif

      cod.subj_start = subj_start;
      memcpy(subj, ocaml_subj, len);

      Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
        Begin_roots1(v_subj);
          v_substrings = caml_alloc_small(2, 0);
        End_roots();

        Field(v_substrings, 0) = v_subj;
        Field(v_substrings, 1) = v_ovec;

        cod.v_substrings_p = &v_substrings;
        cod.v_cof_p = &v_cof;
        new_extra.callout_data = &cod;

        if (extra != NULL) {
          new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
          new_extra.study_data = extra->study_data;
          new_extra.match_limit = extra->match_limit;
          new_extra.tables = extra->tables;
#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
          new_extra.match_limit_recursion = extra->match_limit_recursion;
#endif
        }

        if (is_dfa) {
          workspace_len = Wosize_val(v_workspace);
          workspace = caml_stat_alloc(sizeof(int) * workspace_len);
          ret =
            pcre_dfa_exec(code, extra, subj, len, pos, opt, ovec, ovec_len,
                (int *) &Field(v_workspace, 0), workspace_len);
        } else
          ret =
            pcre_exec(code, &new_extra, subj, len, pos, opt, ovec, ovec_len);

        caml_stat_free(subj);
      End_roots();

      if (ret < 0) {
        if (is_dfa) caml_stat_free(workspace);
        caml_stat_free(ovec);
        if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn);
        else handle_exec_error("pcre_exec_stub(callout)", ret);
      } else {
        handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
        if (is_dfa) {
          caml_int_ptr ocaml_workspace_dst =
            (caml_int_ptr) &Field(v_workspace, 0);
          const int *workspace_src = workspace;
          const int *workspace_src_stop = workspace + workspace_len;
          while (workspace_src != workspace_src_stop) {
            *ocaml_workspace_dst = *workspace_src;
            ocaml_workspace_dst++;
            workspace_src++;
          }
          caml_stat_free(workspace);
        }
        caml_stat_free(ovec);
      }
    }
  }

  return Val_unit;
}

CAMLprim value pcre_exec_stub(
    intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
    value v_ovec, value v_maybe_cof)
{
  return pcre_exec_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj,
                         v_ovec, v_maybe_cof, (value) NULL);
}

/* Byte-code hook for pcre_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
{
  return
    pcre_exec_stub0(
        Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
        argv[4], argv[5], argv[6], (value) NULL);
}

/* Byte-code hook for pcre_dfa_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_dfa_exec_stub_bc(value *argv, int __unused argn)
{
  return
    pcre_exec_stub0(
        Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
        argv[4], argv[5], argv[6], argv[7]);
}

static struct custom_operations tables_ops = {
  "pcre_ocaml_tables",
  pcre_dealloc_tables,
  custom_compare_default,
  custom_hash_default,
  custom_serialize_default,
  custom_deserialize_default,
  custom_compare_ext_default
};

/* Generates a new set of chartables for the current locale (see man
   page of PCRE */
CAMLprim value pcre_maketables_stub(value __unused v_unit)
{
  /* GC will do a full cycle every 1_000_000 table set allocations (one
     table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
     table sets) */
  const value v_tables =
    caml_alloc_custom(
        &tables_ops, sizeof(struct pcre_ocaml_tables), 1, 1000000);
  set_tables(v_tables, pcre_maketables());
  return v_tables;
}

/* Wraps around the isspace-function */
CAMLprim value pcre_isspace_stub(value v_c)
{
  return Val_bool(isspace(Int_val(v_c)));
}


/* Returns number of substring associated with a name */

CAMLprim intnat pcre_get_stringnumber_stub(value v_rex, value v_name)
{
  const int ret = pcre_get_stringnumber(get_rex(v_rex), String_val(v_name));
  if (ret == PCRE_ERROR_NOSUBSTRING)
    caml_invalid_argument("Named string not found");

  return ret;
}

CAMLprim value pcre_get_stringnumber_stub_bc(value v_rex, value v_name)
{
  return Val_int(pcre_get_stringnumber_stub(v_rex, v_name));
}


/* Returns array of names of named substrings in a regexp */
CAMLprim value pcre_names_stub(value v_rex)
{
  CAMLparam0();
  CAMLlocal1(v_res);
  int name_count;
  int entry_size;
  const char *tbl_ptr;
  int i;

  int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
  if (ret != 0) raise_internal_error("pcre_names_stub: namecount");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
  if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
  if (ret != 0) raise_internal_error("pcre_names_stub: nametable");

  v_res = caml_alloc(name_count, 0);

  for (i = 0; i < name_count; ++i) {
    value v_name = caml_copy_string(tbl_ptr + 2);
    Store_field(v_res, i, v_name);
    tbl_ptr += entry_size;
  }

  CAMLreturn(v_res);
}

/* Generic stub for getting integer results from pcre_config */
static inline int pcre_config_int(int what)
{
  int ret;
  pcre_config(what, (void *) &ret);
  return ret;
}

/* Generic stub for getting long integer results from pcre_config */
static inline int pcre_config_long(int what)
{
  long ret;
  pcre_config(what, (void *) &ret);
  return ret;
}
Ejemplo n.º 24
0
static int regression_tests(void)
{
	struct regression_test_case *current = regression_test_cases;
	const char *error;
	const char *cpu_info;
	int i, err_offs;
	int is_successful, is_ascii_pattern, is_ascii_input;
	int total = 0;
	int successful = 0;
	int counter = 0;
#ifdef SUPPORT_PCRE8
	pcre *re8;
	pcre_extra *extra8;
	int ovector8_1[32];
	int ovector8_2[32];
	int return_value8_1, return_value8_2;
	int utf8 = 0, ucp8 = 0;
	int disabled_flags8 = 0;
#endif
#ifdef SUPPORT_PCRE16
	pcre16 *re16;
	pcre16_extra *extra16;
	int ovector16_1[32];
	int ovector16_2[32];
	int return_value16_1, return_value16_2;
	int utf16 = 0, ucp16 = 0;
	int disabled_flags16 = 0;
	int length16;
#endif

	/* This test compares the behaviour of interpreter and JIT. Although disabling
	utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
	still considered successful from pcre_jit_test point of view. */

#ifdef SUPPORT_PCRE8
	pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
#else
	pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
#endif

	printf("Running JIT regression tests\n");
	printf("  target CPU of SLJIT compiler: %s\n", cpu_info);

#ifdef SUPPORT_PCRE8
	pcre_config(PCRE_CONFIG_UTF8, &utf8);
	pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
	if (!utf8)
		disabled_flags8 |= PCRE_UTF8;
	if (!ucp8)
		disabled_flags8 |= PCRE_UCP;
	printf("  in  8 bit mode with utf8  %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
#endif
#ifdef SUPPORT_PCRE16
	pcre16_config(PCRE_CONFIG_UTF16, &utf16);
	pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
	if (!utf16)
		disabled_flags16 |= PCRE_UTF8;
	if (!ucp16)
		disabled_flags16 |= PCRE_UCP;
	printf("  in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
#endif

	while (current->pattern) {
		/* printf("\nPattern: %s :\n", current->pattern); */
		total++;
		if (current->start_offset & F_PROPERTY) {
			is_ascii_pattern = 0;
			is_ascii_input = 0;
		} else {
			is_ascii_pattern = check_ascii(current->pattern);
			is_ascii_input = check_ascii(current->input);
		}

		error = NULL;
#ifdef SUPPORT_PCRE8
		re8 = NULL;
		if (!(current->start_offset & F_NO8))
			re8 = pcre_compile(current->pattern,
				current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
				&error, &err_offs, tables(0));

		extra8 = NULL;
		if (re8) {
			error = NULL;
			extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
			if (!extra8) {
				printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
				pcre_free(re8);
				re8 = NULL;
			}
			if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
				printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
				pcre_free_study(extra8);
				pcre_free(re8);
				re8 = NULL;
			}
		} else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
			printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
#endif
#ifdef SUPPORT_PCRE16
		if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
			convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
		else
			copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);

		re16 = NULL;
		if (!(current->start_offset & F_NO16))
			re16 = pcre16_compile(regtest_buf,
				current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
				&error, &err_offs, tables(0));

		extra16 = NULL;
		if (re16) {
			error = NULL;
			extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
			if (!extra16) {
				printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
				pcre16_free(re16);
				re16 = NULL;
			}
			if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
				printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
				pcre16_free_study(extra16);
				pcre16_free(re16);
				re16 = NULL;
			}
		} else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
			printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
#endif

		counter++;
		if ((counter & 0x3) != 0) {
#ifdef SUPPORT_PCRE8
			setstack8(NULL);
#endif
#ifdef SUPPORT_PCRE16
			setstack16(NULL);
#endif
		}

#ifdef SUPPORT_PCRE8
		return_value8_1 = -1000;
		return_value8_2 = -1000;
		for (i = 0; i < 32; ++i)
			ovector8_1[i] = -2;
		for (i = 0; i < 32; ++i)
			ovector8_2[i] = -2;
		if (re8) {
			setstack8(extra8);
			return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
			return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
		}
#endif

#ifdef SUPPORT_PCRE16
		return_value16_1 = -1000;
		return_value16_2 = -1000;
		for (i = 0; i < 32; ++i)
			ovector16_1[i] = -2;
		for (i = 0; i < 32; ++i)
			ovector16_2[i] = -2;
		if (re16) {
			setstack16(extra16);
			if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
				length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
			else
				length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
			return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
			return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
		}
#endif

		/* If F_DIFF is set, just run the test, but do not compare the results.
		Segfaults can still be captured. */

		is_successful = 1;
		if (!(current->start_offset & F_DIFF)) {
#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
			if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
				/* All results must be the same. */
				if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
					printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
						return_value8_1, return_value8_2, return_value16_1, return_value16_2,
						total, current->pattern, current->input);
					is_successful = 0;
				} else if (return_value8_1 >= 0) {
					return_value8_1 *= 2;
					/* Transform back the results. */
					if (current->flags & PCRE_UTF8) {
						for (i = 0; i < return_value8_1; ++i) {
							if (ovector16_1[i] >= 0)
								ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
							if (ovector16_2[i] >= 0)
								ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
						}
					}

					for (i = 0; i < return_value8_1; ++i)
						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
							printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
								i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
								total, current->pattern, current->input);
							is_successful = 0;
						}
				}
			} else {
#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
				/* Only the 8 bit and 16 bit results must be equal. */
#ifdef SUPPORT_PCRE8
				if (return_value8_1 != return_value8_2) {
					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
						return_value8_1, return_value8_2, total, current->pattern, current->input);
					is_successful = 0;
				} else if (return_value8_1 >= 0) {
					return_value8_1 *= 2;
					for (i = 0; i < return_value8_1; ++i)
						if (ovector8_1[i] != ovector8_2[i]) {
							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
								i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
							is_successful = 0;
						}
				}
#endif

#ifdef SUPPORT_PCRE16
				if (return_value16_1 != return_value16_2) {
					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
						return_value16_1, return_value16_2, total, current->pattern, current->input);
					is_successful = 0;
				} else if (return_value16_1 >= 0) {
					return_value16_1 *= 2;
					for (i = 0; i < return_value16_1; ++i)
						if (ovector16_1[i] != ovector16_2[i]) {
							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
								i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
							is_successful = 0;
						}
				}
#endif

#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
			}
#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
		}

		if (is_successful) {
#ifdef SUPPORT_PCRE8
			if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
				if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
						total, current->pattern, current->input);
					is_successful = 0;
				}

				if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
						total, current->pattern, current->input);
					is_successful = 0;
				}
			}
#endif
#ifdef SUPPORT_PCRE16
			if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
				if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
						total, current->pattern, current->input);
					is_successful = 0;
				}

				if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
						total, current->pattern, current->input);
					is_successful = 0;
				}
			}
#endif
		}

		if (is_successful)
			successful++;

#ifdef SUPPORT_PCRE8
		if (re8) {
			pcre_free_study(extra8);
			pcre_free(re8);
		}
#endif
#ifdef SUPPORT_PCRE16
		if (re16) {
			pcre16_free_study(extra16);
			pcre16_free(re16);
		}
#endif

		/* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
		printf(".");
		fflush(stdout);
		current++;
	}
	tables(1);
#ifdef SUPPORT_PCRE8
	setstack8(NULL);
#endif
#ifdef SUPPORT_PCRE16
	setstack16(NULL);
#endif

	if (total == successful) {
		printf("\nAll JIT regression tests are successfully passed.\n");
		return 0;
	} else {
		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
		return 1;
	}
}
Ejemplo n.º 25
0
pcre_error_code pcre_private(char *INPUT_LINE, char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount)
{
    /* ALL strings are managed as UTF-8 by default */
    int options = PCRE_UTF8;
    int size_offsets = 45;
    int size_offsets_max;
    int *offsets = NULL;
    int all_use_dfa = 0;
    BOOL LOOP_PCRE_TST = FALSE;

    /* These vectors store, end-to-end, a list of captured substring names. Assume
    that 1024 is plenty long enough for the few names we'll be testing. */

    char copynames[1024];
    char getnames[1024];

    char *copynamesptr = NULL;
    char *getnamesptr = NULL;

    int rc = 0;
    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
    if (rc != 1)
    {
        return UTF8_NOT_SUPPORTED;
    }

    /* bug 3891 */
    /* backslash characters are not interpreted for input */
    buffer = strsub(INPUT_LINE, "\\", "\\\\");

    size_offsets_max = size_offsets;
    offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
    if (offsets == NULL)
    {
        if (buffer)
        {
            FREE(buffer);
            buffer = NULL;
        }
        return NOT_ENOUGH_MEMORY_FOR_VECTOR;
    }
    /* Main loop */
    LOOP_PCRE_TST = FALSE;
    while (!LOOP_PCRE_TST)
    {
        pcre *re = NULL;
        pcre_extra *extra = NULL;
        const char *error = NULL;
        char *back_p = NULL;
        char *p = NULL;
        char *pp = NULL;
        char *ppp = NULL;
        const unsigned char *tables = NULL;
        int do_G = 0;
        int do_g = 0;
        int erroroffset = 0, len = 0, delimiter;

        LOOP_PCRE_TST = TRUE;
        p = strdup(INPUT_PAT);
        back_p = p;
        while (isspace(*p))
        {
            p++;
        }
        if (*p == 0)
        {
            continue;
        }
        /* In-line pattern (the usual case). Get the delimiter and seek the end of
        the pattern; if is isn't complete, read more. */

        delimiter = *p++;

        if (isalnum(delimiter) || delimiter == '\\')
        {
            if (buffer)
            {
                FREE(buffer);
                buffer = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
                offsets = NULL;
            }
            if (back_p)
            {
                FREE(back_p);
                back_p = NULL;
            }
            return DELIMITER_NOT_ALPHANUMERIC;
        }

        pp = p;

        while (*pp != 0)
        {
            if (*pp == '\\' && pp[1] != 0)
            {
                pp++;
            }
            else if (*pp == delimiter)
            {
                break;
            }
            pp++;
        }

        /* If the delimiter can't be found, it's a syntax error */
        if (*pp == 0)
        {
            if (buffer)
            {
                FREE(buffer);
                buffer = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
                offsets = NULL;
            }
            if (back_p)
            {
                FREE(back_p);
                back_p = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
            }
            return CAN_NOT_COMPILE_PATTERN;
        }

        /* If the first character after the delimiter is backslash, make
        the pattern end with backslash. This is purely to provide a way
        of testing for the error message when a pattern ends with backslash. */

        if (pp[1] == '\\')
        {
            *pp++ = '\\';
        }

        /* Terminate the pattern at the delimiter, and save a copy of the pattern
        for callouts. */

        *pp++ = 0;

        /* Look for options after final delimiter */

        //options = 8192;

        while (*pp != 0)
        {
            switch (*pp++)
            {
                case 'f':
                    options |= PCRE_FIRSTLINE;
                    break;
                case 'g':
                    do_g = 1;
                    break;
                case 'i':
                    options |= PCRE_CASELESS;
                    break;
                case 'm':
                    options |= PCRE_MULTILINE;
                    break;
                case 's':
                    options |= PCRE_DOTALL;
                    break;
                case 'x':
                    options |= PCRE_EXTENDED;
                    break;
                case '+':
                    break;
                case 'A':
                    options |= PCRE_ANCHORED;
                    break;
                case 'B':
                    break;
                case 'C':
                    options |= PCRE_AUTO_CALLOUT;
                    break;
                case 'D':
                    break;
                case 'E':
                    options |= PCRE_DOLLAR_ENDONLY;
                    break;
                case 'F':
                    break;
                case 'G':
                    do_G = 1;
                    break;
                case 'I':
                    break;
                case 'J':
                    options |= PCRE_DUPNAMES;
                    break;
                case 'M':
                    break;
                case 'N':
                    options |= PCRE_NO_AUTO_CAPTURE;
                    break;
                case 'S':
                    break;
                case 'U':
                    options |= PCRE_UNGREEDY;
                    break;
                case 'X':
                    options |= PCRE_EXTRA;
                    break;
                case 'Z':
                    break;
                case '8':
                {
                    int rc = 0;
                    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
                    if (rc != 1)
                    {
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                        }
                        return UTF8_NOT_SUPPORTED;
                    }
                    options |= PCRE_UTF8;
                }
                break;
                case '?':
                    options |= PCRE_NO_UTF8_CHECK;
                    break;
                case 'L':
                    ppp = pp;
                    /* The '\r' test here is so that it works on Windows. */
                    /* The '0' test is just in case this is an unterminated line. */
                    while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ')
                    {
                        ppp++;
                    }
                    *ppp = 0;
                    if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
                    {
                        goto SKIP_DATA;
                    }

                    tables = pcre_maketables();
                    pp = ppp;
                    break;
                case '>':
                    while (*pp != 0)
                    {
                        pp++;
                    }
                    while (isspace(pp[-1]))
                    {
                        pp--;
                    }
                    *pp = 0;
                    break;
                case '<':
                {
                    while (*pp++ != '>')
                    {
                        ;
                    }
                }
                break;
                case '\r':                      /* So that it works in Windows */
                case '\n':
                case ' ':
                    break;

                default:
                    goto SKIP_DATA;
            }
        }

        /* Handle compiling via the POSIX interface, which doesn't support the
        timing, showing, or debugging options, nor the ability to pass over
        local character tables. */


        {
            re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
            /* Compilation failed; go back for another re, skipping to blank line
            if non-interactive. */
            if (re == NULL)
            {
SKIP_DATA:
                if (buffer)
                {
                    FREE(buffer);
                    buffer = NULL;
                }
                if (offsets)
                {
                    FREE(offsets);
                    offsets = NULL;
                }
                if (tables)
                {
                    (*pcre_free)((void*)tables);
                    tables = NULL;
                }
                if (extra)
                {
                    FREE(extra);
                    extra = NULL;
                }
                if (back_p)
                {
                    FREE(back_p);
                    back_p = NULL;
                }
                return CAN_NOT_COMPILE_PATTERN;
            }

        }        /* End of non-POSIX compile */

        /* Read data lines and test them */
        {
            char *q = NULL;
            char *bptr = NULL;
            int *use_offsets = offsets;
            int use_size_offsets = size_offsets;
            int callout_data = 0;
            int callout_data_set = 0;
            int count = 0;
            int c = 0;
            int copystrings = 0;
            int find_match_limit = 0;
            int getstrings = 0;
            int gmatched = 0;
            int start_offset = 0;
            int g_notempty = 0;
            int use_dfa = 0;

            options = 0;
            *copynames = 0;
            *getnames = 0;

            copynamesptr = copynames;
            getnamesptr = getnames;

            callout_count = 0;
            callout_fail_count = 999999;
            callout_fail_id = -1;

            if (extra != NULL)
            {
                extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
            }
            p = buffer;
            bptr = q = buffer;
            while ((c = *p++) != 0)
            {
                int i = 0;
                int n = 0;

                if (c == '\\') switch ((c = *p++))
                    {
                        case 'a':
                            c =    7;
                            break;
                        case 'b':
                            c = '\b';
                            break;
                        case 'e':
                            c =   27;
                            break;
                        case 'f':
                            c = '\f';
                            break;
                        case 'n':
                            c = '\n';
                            break;
                        case 'r':
                            c = '\r';
                            break;
                        case 't':
                            c = '\t';
                            break;
                        case 'v':
                            c = '\v';
                            break;
                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7':
                            c -= '0';
                            while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
                            {
                                c = c * 8 + *p++ - '0';
                            }
                            break;
                        case 'x':
                            /* Ordinary \x */
                            c = 0;
                            while (i++ < 2 && isxdigit(*p))
                            {
                                c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W');
                                p++;
                            }
                            break;
                        case 0:   /* \ followed by EOF allows for an empty line */
                            p--;
                            continue;
                        case '>':
                            while (isdigit(*p))
                            {
                                start_offset = start_offset * 10 + *p++ - '0';
                            }
                            continue;
                        case 'A':  /* Option setting */
                            options |= PCRE_ANCHORED;
                            continue;
                        case 'B':
                            options |= PCRE_NOTBOL;
                            continue;
                        case 'C':
                            if (isdigit(*p))    /* Set copy string */
                            {
                                while (isdigit(*p))
                                {
                                    n = n * 10 + *p++ - '0';
                                }
                                copystrings |= 1 << n;
                            }
                            else if (isalnum(*p))
                            {
                                char *npp = copynamesptr;
                                while (isalnum(*p))
                                {
                                    *npp++ = *p++;
                                }
                                *npp++ = 0;
                                *npp = 0;
                                pcre_get_stringnumber(re, (char *)copynamesptr);
                                copynamesptr = npp;
                            }
                            else if (*p == '+')
                            {
                                p++;
                            }
                            else if (*p == '-')
                            {
                                p++;
                            }
                            else if (*p == '!')
                            {
                                callout_fail_id = 0;
                                p++;
                                while (isdigit(*p))
                                {
                                    callout_fail_id = callout_fail_id * 10 + *p++ - '0';
                                }
                                callout_fail_count = 0;
                                if (*p == '!')
                                {
                                    p++;
                                    while (isdigit(*p))
                                    {
                                        callout_fail_count = callout_fail_count * 10 + *p++ - '0';
                                    }
                                }
                            }
                            else if (*p == '*')
                            {
                                int sign = 1;
                                callout_data = 0;
                                if (*(++p) == '-')
                                {
                                    sign = -1;
                                    p++;
                                }
                                while (isdigit(*p))
                                {
                                    callout_data = callout_data * 10 + *p++ - '0';
                                }
                                callout_data *= sign;
                                callout_data_set = 1;
                            }
                            continue;
                        case 'G':
                            if (isdigit(*p))
                            {
                                while (isdigit(*p))
                                {
                                    n = n * 10 + *p++ - '0';
                                }
                                getstrings |= 1 << n;
                            }
                            else if (isalnum(*p))
                            {
                                char *npp = getnamesptr;
                                while (isalnum(*p))
                                {
                                    *npp++ = *p++;
                                }
                                *npp++ = 0;
                                *npp = 0;
                                pcre_get_stringnumber(re, (char *)getnamesptr);
                                getnamesptr = npp;
                            }
                            continue;
                        case 'L':
                            continue;
                        case 'M':
                            find_match_limit = 1;
                            continue;
                        case 'N':
                            options |= PCRE_NOTEMPTY;
                            continue;
                        case 'O':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (n > size_offsets_max)
                            {
                                size_offsets_max = n;
                                if (offsets)
                                {
                                    FREE(offsets);
                                }
                                use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
                            }
                            use_size_offsets = n;
                            if (n == 0)
                            {
                                use_offsets = NULL;    /* Ensures it can't write to it */
                            }
                            continue;
                        case 'P':
                            options |= PCRE_PARTIAL;
                            continue;
                        case 'Q':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (extra == NULL)
                            {
                                extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                                extra->flags = 0;
                            }
                            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
                            extra->match_limit_recursion = n;
                            continue;
                        case 'q':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (extra == NULL)
                            {
                                extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                                extra->flags = 0;
                            }
                            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
                            extra->match_limit = n;
                            continue;
#if !defined NODFA
                        case 'R':
                            options |= PCRE_DFA_RESTART;
                            continue;
#endif
                        case 'S':

                            continue;
                        case 'Z':
                            options |= PCRE_NOTEOL;
                            continue;
                        case '?':
                            options |= PCRE_NO_UTF8_CHECK;
                            continue;
                        case '<':
                        {
                            while (*p++ != '>')
                            {
                                ;
                            }
                        }
                        continue;
                    }
                *q++ = (char)c;
            }
            *q = 0;
            len = (int)(q - buffer);
            if ((all_use_dfa || use_dfa) && find_match_limit)
            {
                if (buffer)
                {
                    FREE(buffer);
                    buffer = NULL;
                }
                if (offsets)
                {
                    FREE(offsets);
                    offsets = NULL;
                }
                if (p)
                {
                    FREE(p);
                    p = NULL;
                }
                if (re)
                {
                    (*pcre_free)(re);
                    re = NULL;
                }
                if (tables)
                {
                    (*pcre_free)((void*)tables);
                    tables = NULL;
                }
                if (extra)
                {
                    FREE(extra);
                    extra = NULL;
                }
                return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING;
            }
            /* Handle matching via the POSIX interface, which does not
            support timing or playing with the match limit or callout data. */
            for (;; gmatched++)    /* Loop for /g or /G */
            {

                /* If find_match_limit is set, we want to do repeated matches with
                varying limits in order to find the minimum value for the match limit and
                for the recursion limit. */

                if (find_match_limit)
                {
                    if (extra == NULL)
                    {
                        extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                        extra->flags = 0;
                    }

                    (void)check_match_limit(re, extra, bptr, len, start_offset,
                                            options | g_notempty, use_offsets, use_size_offsets,
                                            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
                                            PCRE_ERROR_MATCHLIMIT);

                    count = check_match_limit(re, extra, bptr, len, start_offset,
                                              options | g_notempty, use_offsets, use_size_offsets,
                                              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
                                              PCRE_ERROR_RECURSIONLIMIT);
                }
                /* If callout_data is set, use the interface with additional data */
                else if (callout_data_set)
                {
                    if (extra == NULL)
                    {
                        extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                        extra->flags = 0;
                    }
                    extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
                    extra->callout_data = &callout_data;
                    count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
                                      options | g_notempty, use_offsets, use_size_offsets);

                    extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
                }
                /* The normal case is just to do the match once, with the default
                value of match_limit. */
                else
                {
                    count = pcre_exec(re, extra, (char *)bptr, len,
                                      start_offset, options | g_notempty, use_offsets, use_size_offsets);
                    if (count == 0)
                    {
                        count = use_size_offsets / 3;
                    }

                    //to retrieve backref count and values
                    if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL)
                    {
                        int i = 0;
                        int iErr = 0;

                        iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount);
                        //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount);

                        if (*_piCapturedStringCount > 0)
                        {
                            *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount);
                            for (i = 0 ; i < *_piCapturedStringCount ; i++)
                            {
                                char* pstSubstring = NULL;
                                pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring);
                                if (pstSubstring != NULL)
                                {
                                    (*_pstCapturedString)[i] = strdup(pstSubstring);
                                }
                                pcre_free_substring(pstSubstring);
                            }
                        }
                    }
                }
                /* Matched */
                if (count >= 0)
                {
                    int i, maxcount;
                    maxcount = use_size_offsets / 3;
                    /* This is a check against a lunatic return value. */
                    if (count > maxcount)
                    {
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                            offsets = NULL;
                        }
                        if (re)
                        {
                            (*pcre_free)(re);
                            re = NULL;
                        }
                        if (tables)
                        {
                            (*pcre_free)((void*)tables);
                            tables = NULL;
                        }
                        if (extra)
                        {
                            FREE(extra);
                            extra = NULL;
                        }
                        if (back_p)
                        {
                            FREE(back_p);
                            back_p = NULL;
                        }
                        return TOO_BIG_FOR_OFFSET_SIZE;
                    }

                    for (i = 0; i < count * 2; i += 2)
                    {
                        if (use_offsets[i] >= 0)
                        {
                            *Output_Start = use_offsets[i];
                            *Output_End = use_offsets[i + 1];
                            if (buffer)
                            {
                                FREE(buffer);
                            }

                            /* use_offsets = offsets no need to free use_offsets if we free offsets */
                            if (offsets)
                            {
                                FREE(offsets);
                            }

                            /* "re" allocated by pcre_compile (better to use free function associated)*/
                            if (re)
                            {
                                (*pcre_free)(re);
                            }

                            if (extra)
                            {
                                FREE(extra);
                            }
                            if (tables)
                            {
                                /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/
                                (*pcre_free)((void *)tables);
                                tables = NULL;
                                setlocale(LC_CTYPE, "C");
                            }

                            if (back_p)
                            {
                                FREE(back_p);
                                back_p = NULL;
                            }
                            return PCRE_FINISHED_OK;
                        }
                    }

                    for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1)
                    {
                        char copybuffer[256];
                        pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
                    }

                    for (i = 0; i < 32; i++)
                    {
                        if ((getstrings & (1 << i)) != 0)
                        {
                            const char *substring;
                            pcre_get_substring((char *)bptr, use_offsets, count, i, &substring);
                        }
                    }

                    for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1)
                    {
                        const char *substring;
                        pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring);
                    }

                }
                /* Failed to match. If this is a /g or /G loop and we previously set
                g_notempty after a null match, this is not necessarily the end. We want
                to advance the start offset, and continue. We won't be at the end of the
                string - that was checked before setting g_notempty.
                Complication arises in the case when the newline option is "any" or
                "anycrlf". If the previous match was at the end of a line terminated by
                CRLF, an advance of one character just passes the \r, whereas we should
                prefer the longer newline sequence, as does the code in pcre_exec().
                Fudge the offset value to achieve this.

                Otherwise, in the case of UTF-8 matching, the advance must be one
                character, not one byte. */
                else
                {
                    if (count == PCRE_ERROR_NOMATCH)
                    {
                        if (gmatched == 0)
                        {
                            if (tables)
                            {
                                (*pcre_free)((void *)tables);
                                tables = NULL;
                            }
                            if (re)
                            {
                                (*pcre_free)((void *)re);
                                re = NULL;
                            }
                            if (buffer)
                            {
                                FREE(buffer);
                                buffer = NULL;
                            }
                            if (offsets)
                            {
                                FREE(offsets);
                            }
                            if (p)
                            {
                                FREE(back_p);
                                back_p = NULL;
                            }
                            return NO_MATCH;
                        }
                    }

                    if (count == PCRE_ERROR_MATCHLIMIT )
                    {
                        if (tables)
                        {
                            (*pcre_free)((void *)tables);
                            tables = NULL;
                        }
                        if (re)
                        {
                            (*pcre_free)((void *)re);
                            re = NULL;
                        }
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                            offsets = NULL;
                        }
                        if (back_p)
                        {
                            FREE(back_p);
                            back_p = NULL;
                        }
                        return MATCH_LIMIT;
                    }
                    break;  /* Out of loop */
                }

                /* If not /g or /G we are done */
                if (!do_g && !do_G)
                {
                    break;
                }

                /* If we have matched an empty string, first check to see if we are at
                the end of the subject. If so, the /g loop is over. Otherwise, mimic
                what Perl's /g options does. This turns out to be rather cunning. First
                we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
                same point. If this fails (picked up above) we advance to the next
                character. */

                g_notempty = 0;

                if (use_offsets[0] == use_offsets[1])
                {
                    if (use_offsets[0] == len)
                    {
                        break;
                    }
                    g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
                }

                /* For /g, update the start offset, leaving the rest alone */

                if (do_g)
                {
                    start_offset = use_offsets[1];
                }
                /* For /G, update the pointer and length */
                else
                {
                    bptr += use_offsets[1];
                    len -= use_offsets[1];
                }
            }  /* End of loop for /g and /G */

            if (re)
            {
                (*pcre_free)(re);
                re = NULL;
            }
            if (extra)
            {
                FREE(extra);
                extra = NULL;
            }
            if (tables)
            {
                (*pcre_free)((void *)tables);
                tables = NULL;
            }

            FREE(back_p);
            back_p = NULL;
            continue;
        }    /* End of loop for data lines */
    }

    if (buffer)
    {
        FREE(buffer);
        buffer = NULL;
    }
    if (offsets)
    {
        FREE(offsets);
        offsets = NULL;
    }

    return PCRE_EXIT;
}
Ejemplo n.º 26
0
int main(int argc, char **argv) {
    set_log_level(LOG_LEVEL_WARN);

    char **paths = NULL;
    int i;
    int pcre_opts = PCRE_MULTILINE;
    int study_opts = 0;
    const char *pcre_err = NULL;
    int pcre_err_offset = 0;
    pcre *re = NULL;
    pcre_extra *re_extra = NULL;
    double time_diff = 0.0;

    memset(&stats, 0, sizeof(stats)); /* What's the point of an init function if it's going to be this one-liner? */

    gettimeofday(&(stats.time_start), NULL);

    parse_options(argc, argv, &paths);

    log_debug("PCRE Version: %s", pcre_version());

    if (opts.casing == CASE_INSENSITIVE) {
        pcre_opts = pcre_opts | PCRE_CASELESS;
    }

    if (!is_regex(opts.query)) {
        /* No special chars. Do a literal match */
        opts.literal = 1;
    }

    if (opts.literal) {
        generate_skip_lookup(opts.query, opts.query_len, skip_lookup, opts.casing == CASE_SENSITIVE);
    }
    else {
        re = pcre_compile(opts.query, pcre_opts, &pcre_err, &pcre_err_offset, NULL);
        if (re == NULL) {
            log_err("pcre_compile failed at position %i. Error: %s", pcre_err_offset, pcre_err);
            exit(1);
        }

#ifdef USE_PCRE_JIT
        int has_jit = 0;
        pcre_config(PCRE_CONFIG_JIT, &has_jit);
        if (has_jit) {
            study_opts |= PCRE_STUDY_JIT_COMPILE;
        }
#endif

        re_extra = pcre_study(re, study_opts, &pcre_err);
        if (re_extra == NULL) {
            log_debug("pcre_study returned nothing useful. Error: %s", pcre_err);
        }
    }

    if (opts.search_stream) {
        search_stdin(re, re_extra);
    }
    else {
        for (i = 0; paths[i] != NULL; i++) {
            log_debug("searching path %s for %s", paths[i], opts.query);
            search_dir(re, re_extra, paths[i], 0);
            free(paths[i]);
        }
    }

    if (opts.stats) {
        gettimeofday(&(stats.time_end), NULL);
        time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) -
                    ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec);
        time_diff = time_diff / 1000000;

        printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff);
    }

    pcre_free(re);
    pcre_free(re_extra); /* Using pcre_free_study here segfaults on some versions of PCRE */
    free(paths);
    cleanup_ignore_patterns();

    return(0);
}