Exemplo n.º 1
0
bool tokener::copy_regex(std::string & value, int & pcre_flags)
{
	if ( ! is_regex()) return false;
	size_t ix = line.find('/', ix_cur+1);
	if (ix == std::string::npos)
		return false;

	ix_cur += 1; // skip leading /
	cch = ix - ix_cur;
	value = line.substr(ix_cur, cch); // return value between //'s
	ix_next = ix+1; // skip trailing /
	ix = line.find_first_of(sep, ix_next);
	if (ix == std::string::npos) { ix = line.size(); }

	// regex options will follow right after, or they will not exist.
	pcre_flags = 0;
	while (ix_next < ix) {
		switch (line[ix_next++]) {
		case 'g': pcre_flags |= 0x80000000; break;
		case 'm': pcre_flags |= PCRE_MULTILINE; break;
		case 'i': pcre_flags |= PCRE_CASELESS; break;
		case 'U': pcre_flags |= PCRE_UNGREEDY; break;
		default: return false;
		}
	}
	return true;
}
Exemplo n.º 2
0
    inline compare_result<typename Range::const_iterator>
    compare(
        Range const& in
      , Range const& tem
      , char const* re_prefix
      , char const* re_suffix
    )
    {
       typedef typename Range::const_iterator iter_t;
       typedef compare_result<iter_t> compare_result_t;

       iter_t in_first = in.begin();
       iter_t in_last = in.end();
       iter_t tem_first = tem.begin();
       iter_t tem_last = tem.end();
       std::string re;

       while (in_first != in_last && tem_first != tem_last)
       {
            if (is_regex(tem_first, tem_last, re, re_prefix, re_suffix))
            {
                boost::match_results<iter_t> what;
                boost::regex e(re);
                if (!boost::regex_search(
                    in_first, in_last, what, e
                  , boost::match_default | boost::match_continuous))
                {
                    // RE mismatch: exit now.
                    return compare_result_t(in_first, false);
                }
                else
                {
                    // RE match: gobble the matching string.
                    in_first = what[0].second;
                }
            }
            else
            {
                // Char by char comparison. Exit if we have a mismatch.
                if (*in_first++ != *tem_first++)
                    return compare_result_t(in_first, false);
            }
        }

        // Ignore trailing spaces in template
        bool has_trailing_nonspaces = false;
        while (tem_first != tem_last)
        {
            if (!std::isspace(*tem_first++))
            {
                has_trailing_nonspaces = true;
                break;
            }
        }
        while (in_first != in_last)
        {
            if (!std::isspace(*in_first++))
            {
                has_trailing_nonspaces = true;
                break;
            }
        }
        // return a full match only if the template is fully matched and if there
        // are no more characters to match in the source
        return compare_result_t(in_first, !has_trailing_nonspaces);
   }
Exemplo n.º 3
0
void parse_options(int argc, char **argv, char **paths[]) {
    int ch;
    int i;
    int path_len = 0;
    int useless = 0;
    int group = 1;
    int help = 0;
    int version = 0;
    int opt_index = 0;
    const char *home_dir = getenv("HOME");
    char *ignore_file_path = NULL;
    int needs_query = 1;

    init_options();

    struct option longopts[] = {
        { "ackmate", no_argument, &(opts.ackmate), 1 },
        { "ackmate-dir-filter", required_argument, NULL, 0 },
        { "after", required_argument, NULL, 'A' },
        { "all-text", no_argument, NULL, 't' },
        { "all-types", no_argument, NULL, 'a' },
        { "before", required_argument, NULL, 'B' },
        { "break", no_argument, &(opts.print_break), 1 },
        { "case-sensitive", no_argument, NULL, 's' },
        { "color", no_argument, &(opts.color), 1 },
        { "column", no_argument, &(opts.column), 1 },
        { "context", optional_argument, NULL, 'C' },
        { "debug", no_argument, NULL, 'D' },
        { "depth", required_argument, NULL, 0 },
        { "file-search-regex", required_argument, NULL, 'G' },
        { "files-with-matches", no_argument, NULL, 'l' },
        { "files-without-matches", no_argument, NULL, 'L' },
        { "follow", no_argument, &(opts.follow_symlinks), 1 },
        { "group", no_argument, &(group), 1 },
        { "heading", no_argument, &(opts.print_heading), 1 },
        { "help", no_argument, NULL, 'h' },
        { "hidden", no_argument, &(opts.search_hidden_files), 1 },
        { "ignore", required_argument, NULL, 0 },
        { "ignore-case", no_argument, NULL, 'i' },
        { "invert-match", no_argument, &(opts.invert_match), 1 },
        { "literal", no_argument, NULL, 'Q' },
        { "match", no_argument, &useless, 0 },
        { "max-count", required_argument, NULL, 'm' },
        { "no-recurse", no_argument, NULL, 'n' },
        { "nobreak", no_argument, &(opts.print_break), 0 },
        { "nocolor", no_argument, &(opts.color), 0 },
        { "nofollow", no_argument, &(opts.follow_symlinks), 0 },
        { "nogroup", no_argument, &(group), 0 },
        { "noheading", no_argument, &(opts.print_heading), 0 },
        { "parallel", no_argument, &(opts.parallel), 1},
        { "path-to-agignore", required_argument, NULL, 'p'},
        { "print-long-lines", no_argument, &(opts.print_long_lines), 1 },
        { "recurse", no_argument, NULL, 'r' },
        { "search-binary", no_argument, &(opts.search_binary_files), 1 },
        { "search-files", no_argument, &(opts.search_stream), 0 },
        { "skip-vcs-ignores", no_argument, NULL, 'U' },
        { "smart-case", no_argument, NULL, 'S' },
        { "stats", no_argument, &(opts.stats), 1 },
        { "unrestricted", no_argument, NULL, 'u' },
        { "version", no_argument, &version, 1 },
        { "word-regexp", no_argument, NULL, 'w' },
        { "workers", required_argument, NULL, 0 },
        { NULL, 0, NULL, 0 }
    };

    if (argc < 2) {
        usage();
        exit(1);
    }

    /* stdin isn't a tty. something's probably being piped to ag */
    if (!isatty(fileno(stdin))) {
        opts.search_stream = 1;
    }

    /* If we're not outputting to a terminal. change output to:
        * turn off colors
        * print filenames on every line
     */
    if (!isatty(fileno(stdout))) {
        opts.color = 0;
        group = 0;
    }

    while ((ch = getopt_long(argc, argv, "A:aB:C:DG:g:fhiLlm:np:QRrSsvVtuUw", longopts, &opt_index)) != -1) {
        switch (ch) {
            case 'A':
                opts.after = atoi(optarg);
                break;
            case 'a':
                opts.search_all_files = 1;
                opts.search_binary_files = 1;
                break;
            case 'B':
                opts.before = atoi(optarg);
                break;
            case 'C':
                if (optarg) {
                    opts.context = atoi(optarg);
                    if (opts.context == 0 && errno == EINVAL) {
                        /* This arg must be the search string instead of the context length */
                        optind--;
                        opts.context = DEFAULT_CONTEXT_LEN;
                    }
                }
                else {
                    opts.context = DEFAULT_CONTEXT_LEN;
                }
                break;
            case 'D':
                set_log_level(LOG_LEVEL_DEBUG);
                break;
            case 'f':
                opts.follow_symlinks = 1;
                break;
            case 'g':
                needs_query = 0;
                opts.match_files = 1;
                /* Fall through and build regex */
            case 'G':
                compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, optarg, 0, 0);
                break;
            case 'h':
                help = 1;
                break;
            case 'i':
                opts.casing = CASE_INSENSITIVE;
                break;
            case 'L':
                opts.invert_match = 1;
                /* fall through */
            case 'l':
                opts.print_filename_only = 1;
                break;
            case 'm':
                opts.max_matches_per_file = atoi(optarg);
                break;
            case 'n':
                opts.recurse_dirs = 0;
                break;
            case 'p':
                opts.path_to_agignore = optarg;
                break;
            case 'Q':
                opts.literal = 1;
                break;
            case 'R':
            case 'r':
                opts.recurse_dirs = 1;
                break;
            case 'S':
                opts.casing = CASE_SMART;
                break;
            case 's':
                opts.casing = CASE_SENSITIVE;
                break;
            case 't':
                opts.search_all_files = 1;
                break;
            case 'u':
                opts.search_binary_files = 1;
                opts.search_all_files = 1;
                opts.search_hidden_files = 1;
                break;
            case 'U':
                opts.skip_vcs_ignores = 1;
                break;
            case 'v':
                opts.invert_match = 1;
                break;
            case 'V':
                version = 1;
                break;
            case 'w':
                opts.word_regexp = 1;
                break;
            case 0: /* Long option */
                if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) {
                    compile_study(&opts.ackmate_dir_filter, &opts.ackmate_dir_filter_extra, optarg, 0, 0);
                    break;
                }
                else if (strcmp(longopts[opt_index].name, "depth") == 0) {
                    opts.max_search_depth = atoi(optarg);
                    break;
                }
                else if (strcmp(longopts[opt_index].name, "ignore") == 0) {
                    add_ignore_pattern(root_ignores, optarg);
                    break;
                }
                else if (strcmp(longopts[opt_index].name, "workers") == 0) {
                    opts.workers = atoi(optarg);
                    break;
                }
                /* Continue to usage if we don't recognize the option */
                if (longopts[opt_index].flag != 0) {
                    break;
                }
                log_err("option %s does not take a value", longopts[opt_index].name);
            default:
                usage();
                exit(1);
        }
    }

    argc -= optind;
    argv += optind;

    if (help) {
        usage();
        exit(0);
    }

    if (version) {
        print_version();
        exit(0);
    }

    if (needs_query && argc == 0) {
        log_err("What do you want to search for?");
        exit(1);
    }

    if (home_dir && !opts.search_all_files) {
        log_debug("Found user's home dir: %s", home_dir);
        asprintf(&ignore_file_path, "%s/%s", home_dir, ignore_pattern_files[0]);
        load_ignore_patterns(root_ignores, ignore_file_path);
        free(ignore_file_path);
    }

    if (opts.context > 0) {
        opts.before = opts.context;
        opts.after = opts.context;
    }

    if (opts.ackmate) {
        opts.color = 0;
        opts.print_break = 1;
        group = 1;
        opts.search_stream = 0;
    }

    if (opts.parallel) {
        opts.search_stream = 0;
    }

    if (opts.print_heading == 0 || opts.print_break == 0) {
        goto skip_group;
    }

    if (group) {
        opts.print_heading = 1;
        opts.print_break = 1;
    }
    else {
        opts.print_heading = 0;
        opts.print_break = 0;
    }

    skip_group:;

    if (opts.search_stream) {
        opts.print_break = 0;
        opts.print_heading = 0;
        opts.print_line_numbers = 0;
    }

    if (needs_query) {
        opts.query = strdup(argv[0]);
        argc--;
        argv++;
    }
    else {
        opts.query = strdup(".");
    }
    opts.query_len = strlen(opts.query);

    log_debug("Query is %s", opts.query);

    if (opts.query_len == 0) {
        log_err("Error: No query. What do you want to search for?");
        exit(1);
    }

    if (!is_regex(opts.query)) {
        opts.literal = 1;
    }

    char *path = NULL;
    opts.paths_len = argc;
    if (argc > 0) {
        *paths = calloc(sizeof(char*), argc + 1);
        for (i = 0; i < argc; i++) {
            path = strdup(argv[i]);
            path_len = strlen(path);
            /* kill trailing slash */
            if (path_len > 1 && path[path_len - 1] == '/') {
              path[path_len - 1] = '\0';
            }
            (*paths)[i] = path;
        }
        (*paths)[i] = NULL;
        /* Make sure we search these paths instead of stdin. */
        opts.search_stream = 0;
    }
    else {
        path = strdup(".");
        *paths = malloc(sizeof(char*) * 2);
        (*paths)[0] = path;
        (*paths)[1] = NULL;
    }
}
Exemplo n.º 4
0
int main(int argc, char **argv) {
    set_log_level(LOG_LEVEL_WARN);

    char **paths = NULL;
    int i;
    int pcre_opts = PCRE_MULTILINE;
    int study_opts = 0;
    const char *pcre_err = NULL;
    int pcre_err_offset = 0;
    pcre *re = NULL;
    pcre_extra *re_extra = NULL;
    double time_diff = 0.0;

    memset(&stats, 0, sizeof(stats)); /* What's the point of an init function if it's going to be this one-liner? */

    gettimeofday(&(stats.time_start), NULL);

    parse_options(argc, argv, &paths);

    log_debug("PCRE Version: %s", pcre_version());

    if (opts.casing == CASE_INSENSITIVE) {
        pcre_opts = pcre_opts | PCRE_CASELESS;
    }

    if (!is_regex(opts.query)) {
        /* No special chars. Do a literal match */
        opts.literal = 1;
    }

    if (opts.literal) {
        generate_skip_lookup(opts.query, opts.query_len, skip_lookup, opts.casing == CASE_SENSITIVE);
    }
    else {
        re = pcre_compile(opts.query, pcre_opts, &pcre_err, &pcre_err_offset, NULL);
        if (re == NULL) {
            log_err("pcre_compile failed at position %i. Error: %s", pcre_err_offset, pcre_err);
            exit(1);
        }

#ifdef USE_PCRE_JIT
        int has_jit = 0;
        pcre_config(PCRE_CONFIG_JIT, &has_jit);
        if (has_jit) {
            study_opts |= PCRE_STUDY_JIT_COMPILE;
        }
#endif

        re_extra = pcre_study(re, study_opts, &pcre_err);
        if (re_extra == NULL) {
            log_debug("pcre_study returned nothing useful. Error: %s", pcre_err);
        }
    }

    if (opts.search_stream) {
        search_stdin(re, re_extra);
    }
    else {
        for (i = 0; paths[i] != NULL; i++) {
            log_debug("searching path %s for %s", paths[i], opts.query);
            search_dir(re, re_extra, paths[i], 0);
            free(paths[i]);
        }
    }

    if (opts.stats) {
        gettimeofday(&(stats.time_end), NULL);
        time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) -
                    ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec);
        time_diff = time_diff / 1000000;

        printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff);
    }

    pcre_free(re);
    pcre_free(re_extra); /* Using pcre_free_study here segfaults on some versions of PCRE */
    free(paths);
    cleanup_ignore_patterns();

    return(0);
}