bool tokener::copy_regex(std::string & value, int & pcre_flags) { if ( ! is_regex()) return false; size_t ix = line.find('/', ix_cur+1); if (ix == std::string::npos) return false; ix_cur += 1; // skip leading / cch = ix - ix_cur; value = line.substr(ix_cur, cch); // return value between //'s ix_next = ix+1; // skip trailing / ix = line.find_first_of(sep, ix_next); if (ix == std::string::npos) { ix = line.size(); } // regex options will follow right after, or they will not exist. pcre_flags = 0; while (ix_next < ix) { switch (line[ix_next++]) { case 'g': pcre_flags |= 0x80000000; break; case 'm': pcre_flags |= PCRE_MULTILINE; break; case 'i': pcre_flags |= PCRE_CASELESS; break; case 'U': pcre_flags |= PCRE_UNGREEDY; break; default: return false; } } return true; }
inline compare_result<typename Range::const_iterator> compare( Range const& in , Range const& tem , char const* re_prefix , char const* re_suffix ) { typedef typename Range::const_iterator iter_t; typedef compare_result<iter_t> compare_result_t; iter_t in_first = in.begin(); iter_t in_last = in.end(); iter_t tem_first = tem.begin(); iter_t tem_last = tem.end(); std::string re; while (in_first != in_last && tem_first != tem_last) { if (is_regex(tem_first, tem_last, re, re_prefix, re_suffix)) { boost::match_results<iter_t> what; boost::regex e(re); if (!boost::regex_search( in_first, in_last, what, e , boost::match_default | boost::match_continuous)) { // RE mismatch: exit now. return compare_result_t(in_first, false); } else { // RE match: gobble the matching string. in_first = what[0].second; } } else { // Char by char comparison. Exit if we have a mismatch. if (*in_first++ != *tem_first++) return compare_result_t(in_first, false); } } // Ignore trailing spaces in template bool has_trailing_nonspaces = false; while (tem_first != tem_last) { if (!std::isspace(*tem_first++)) { has_trailing_nonspaces = true; break; } } while (in_first != in_last) { if (!std::isspace(*in_first++)) { has_trailing_nonspaces = true; break; } } // return a full match only if the template is fully matched and if there // are no more characters to match in the source return compare_result_t(in_first, !has_trailing_nonspaces); }
void parse_options(int argc, char **argv, char **paths[]) { int ch; int i; int path_len = 0; int useless = 0; int group = 1; int help = 0; int version = 0; int opt_index = 0; const char *home_dir = getenv("HOME"); char *ignore_file_path = NULL; int needs_query = 1; init_options(); struct option longopts[] = { { "ackmate", no_argument, &(opts.ackmate), 1 }, { "ackmate-dir-filter", required_argument, NULL, 0 }, { "after", required_argument, NULL, 'A' }, { "all-text", no_argument, NULL, 't' }, { "all-types", no_argument, NULL, 'a' }, { "before", required_argument, NULL, 'B' }, { "break", no_argument, &(opts.print_break), 1 }, { "case-sensitive", no_argument, NULL, 's' }, { "color", no_argument, &(opts.color), 1 }, { "column", no_argument, &(opts.column), 1 }, { "context", optional_argument, NULL, 'C' }, { "debug", no_argument, NULL, 'D' }, { "depth", required_argument, NULL, 0 }, { "file-search-regex", required_argument, NULL, 'G' }, { "files-with-matches", no_argument, NULL, 'l' }, { "files-without-matches", no_argument, NULL, 'L' }, { "follow", no_argument, &(opts.follow_symlinks), 1 }, { "group", no_argument, &(group), 1 }, { "heading", no_argument, &(opts.print_heading), 1 }, { "help", no_argument, NULL, 'h' }, { "hidden", no_argument, &(opts.search_hidden_files), 1 }, { "ignore", required_argument, NULL, 0 }, { "ignore-case", no_argument, NULL, 'i' }, { "invert-match", no_argument, &(opts.invert_match), 1 }, { "literal", no_argument, NULL, 'Q' }, { "match", no_argument, &useless, 0 }, { "max-count", required_argument, NULL, 'm' }, { "no-recurse", no_argument, NULL, 'n' }, { "nobreak", no_argument, &(opts.print_break), 0 }, { "nocolor", no_argument, &(opts.color), 0 }, { "nofollow", no_argument, &(opts.follow_symlinks), 0 }, { "nogroup", no_argument, &(group), 0 }, { "noheading", no_argument, &(opts.print_heading), 0 }, { "parallel", no_argument, &(opts.parallel), 1}, { "path-to-agignore", required_argument, NULL, 'p'}, { "print-long-lines", no_argument, &(opts.print_long_lines), 1 }, { "recurse", no_argument, NULL, 'r' }, { "search-binary", no_argument, &(opts.search_binary_files), 1 }, { "search-files", no_argument, &(opts.search_stream), 0 }, { "skip-vcs-ignores", no_argument, NULL, 'U' }, { "smart-case", no_argument, NULL, 'S' }, { "stats", no_argument, &(opts.stats), 1 }, { "unrestricted", no_argument, NULL, 'u' }, { "version", no_argument, &version, 1 }, { "word-regexp", no_argument, NULL, 'w' }, { "workers", required_argument, NULL, 0 }, { NULL, 0, NULL, 0 } }; if (argc < 2) { usage(); exit(1); } /* stdin isn't a tty. something's probably being piped to ag */ if (!isatty(fileno(stdin))) { opts.search_stream = 1; } /* If we're not outputting to a terminal. change output to: * turn off colors * print filenames on every line */ if (!isatty(fileno(stdout))) { opts.color = 0; group = 0; } while ((ch = getopt_long(argc, argv, "A:aB:C:DG:g:fhiLlm:np:QRrSsvVtuUw", longopts, &opt_index)) != -1) { switch (ch) { case 'A': opts.after = atoi(optarg); break; case 'a': opts.search_all_files = 1; opts.search_binary_files = 1; break; case 'B': opts.before = atoi(optarg); break; case 'C': if (optarg) { opts.context = atoi(optarg); if (opts.context == 0 && errno == EINVAL) { /* This arg must be the search string instead of the context length */ optind--; opts.context = DEFAULT_CONTEXT_LEN; } } else { opts.context = DEFAULT_CONTEXT_LEN; } break; case 'D': set_log_level(LOG_LEVEL_DEBUG); break; case 'f': opts.follow_symlinks = 1; break; case 'g': needs_query = 0; opts.match_files = 1; /* Fall through and build regex */ case 'G': compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, optarg, 0, 0); break; case 'h': help = 1; break; case 'i': opts.casing = CASE_INSENSITIVE; break; case 'L': opts.invert_match = 1; /* fall through */ case 'l': opts.print_filename_only = 1; break; case 'm': opts.max_matches_per_file = atoi(optarg); break; case 'n': opts.recurse_dirs = 0; break; case 'p': opts.path_to_agignore = optarg; break; case 'Q': opts.literal = 1; break; case 'R': case 'r': opts.recurse_dirs = 1; break; case 'S': opts.casing = CASE_SMART; break; case 's': opts.casing = CASE_SENSITIVE; break; case 't': opts.search_all_files = 1; break; case 'u': opts.search_binary_files = 1; opts.search_all_files = 1; opts.search_hidden_files = 1; break; case 'U': opts.skip_vcs_ignores = 1; break; case 'v': opts.invert_match = 1; break; case 'V': version = 1; break; case 'w': opts.word_regexp = 1; break; case 0: /* Long option */ if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) { compile_study(&opts.ackmate_dir_filter, &opts.ackmate_dir_filter_extra, optarg, 0, 0); break; } else if (strcmp(longopts[opt_index].name, "depth") == 0) { opts.max_search_depth = atoi(optarg); break; } else if (strcmp(longopts[opt_index].name, "ignore") == 0) { add_ignore_pattern(root_ignores, optarg); break; } else if (strcmp(longopts[opt_index].name, "workers") == 0) { opts.workers = atoi(optarg); break; } /* Continue to usage if we don't recognize the option */ if (longopts[opt_index].flag != 0) { break; } log_err("option %s does not take a value", longopts[opt_index].name); default: usage(); exit(1); } } argc -= optind; argv += optind; if (help) { usage(); exit(0); } if (version) { print_version(); exit(0); } if (needs_query && argc == 0) { log_err("What do you want to search for?"); exit(1); } if (home_dir && !opts.search_all_files) { log_debug("Found user's home dir: %s", home_dir); asprintf(&ignore_file_path, "%s/%s", home_dir, ignore_pattern_files[0]); load_ignore_patterns(root_ignores, ignore_file_path); free(ignore_file_path); } if (opts.context > 0) { opts.before = opts.context; opts.after = opts.context; } if (opts.ackmate) { opts.color = 0; opts.print_break = 1; group = 1; opts.search_stream = 0; } if (opts.parallel) { opts.search_stream = 0; } if (opts.print_heading == 0 || opts.print_break == 0) { goto skip_group; } if (group) { opts.print_heading = 1; opts.print_break = 1; } else { opts.print_heading = 0; opts.print_break = 0; } skip_group:; if (opts.search_stream) { opts.print_break = 0; opts.print_heading = 0; opts.print_line_numbers = 0; } if (needs_query) { opts.query = strdup(argv[0]); argc--; argv++; } else { opts.query = strdup("."); } opts.query_len = strlen(opts.query); log_debug("Query is %s", opts.query); if (opts.query_len == 0) { log_err("Error: No query. What do you want to search for?"); exit(1); } if (!is_regex(opts.query)) { opts.literal = 1; } char *path = NULL; opts.paths_len = argc; if (argc > 0) { *paths = calloc(sizeof(char*), argc + 1); for (i = 0; i < argc; i++) { path = strdup(argv[i]); path_len = strlen(path); /* kill trailing slash */ if (path_len > 1 && path[path_len - 1] == '/') { path[path_len - 1] = '\0'; } (*paths)[i] = path; } (*paths)[i] = NULL; /* Make sure we search these paths instead of stdin. */ opts.search_stream = 0; } else { path = strdup("."); *paths = malloc(sizeof(char*) * 2); (*paths)[0] = path; (*paths)[1] = NULL; } }
int main(int argc, char **argv) { set_log_level(LOG_LEVEL_WARN); char **paths = NULL; int i; int pcre_opts = PCRE_MULTILINE; int study_opts = 0; const char *pcre_err = NULL; int pcre_err_offset = 0; pcre *re = NULL; pcre_extra *re_extra = NULL; double time_diff = 0.0; memset(&stats, 0, sizeof(stats)); /* What's the point of an init function if it's going to be this one-liner? */ gettimeofday(&(stats.time_start), NULL); parse_options(argc, argv, &paths); log_debug("PCRE Version: %s", pcre_version()); if (opts.casing == CASE_INSENSITIVE) { pcre_opts = pcre_opts | PCRE_CASELESS; } if (!is_regex(opts.query)) { /* No special chars. Do a literal match */ opts.literal = 1; } if (opts.literal) { generate_skip_lookup(opts.query, opts.query_len, skip_lookup, opts.casing == CASE_SENSITIVE); } else { re = pcre_compile(opts.query, pcre_opts, &pcre_err, &pcre_err_offset, NULL); if (re == NULL) { log_err("pcre_compile failed at position %i. Error: %s", pcre_err_offset, pcre_err); exit(1); } #ifdef USE_PCRE_JIT int has_jit = 0; pcre_config(PCRE_CONFIG_JIT, &has_jit); if (has_jit) { study_opts |= PCRE_STUDY_JIT_COMPILE; } #endif re_extra = pcre_study(re, study_opts, &pcre_err); if (re_extra == NULL) { log_debug("pcre_study returned nothing useful. Error: %s", pcre_err); } } if (opts.search_stream) { search_stdin(re, re_extra); } else { for (i = 0; paths[i] != NULL; i++) { log_debug("searching path %s for %s", paths[i], opts.query); search_dir(re, re_extra, paths[i], 0); free(paths[i]); } } if (opts.stats) { gettimeofday(&(stats.time_end), NULL); time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); time_diff = time_diff / 1000000; printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); } pcre_free(re); pcre_free(re_extra); /* Using pcre_free_study here segfaults on some versions of PCRE */ free(paths); cleanup_ignore_patterns(); return(0); }