/* TODO: Append matches to some data structure instead of just printing them out. * Then ag can have sweet summaries of matches/files scanned/time/etc. */ void search_dir(ignores *ig, const char *base_path, const char *path, const int depth) { struct dirent **dir_list = NULL; struct dirent *dir = NULL; scandir_baton_t scandir_baton; int results = 0; char *dir_full_path = NULL; const char *ignore_file = NULL; int i; /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */ for (i = 0; opts.skip_vcs_ignores ? (i == 0) : (ignore_pattern_files[i] != NULL); i++) { ignore_file = ignore_pattern_files[i]; ag_asprintf(&dir_full_path, "%s/%s", path, ignore_file); if (strcmp(SVN_DIR, ignore_file) == 0) { load_svn_ignore_patterns(ig, dir_full_path); } else { load_ignore_patterns(ig, dir_full_path); } free(dir_full_path); dir_full_path = NULL; } if (opts.path_to_agignore) { load_ignore_patterns(ig, opts.path_to_agignore); } scandir_baton.ig = ig; scandir_baton.base_path = base_path; results = ag_scandir(path, &dir_list, &filename_filter, &scandir_baton); if (results == 0) { log_debug("No results found in directory %s", path); goto search_dir_cleanup; } else if (results == -1) { if (errno == ENOTDIR) { /* Not a directory. Probably a file. */ /* If we're only searching one file, don't print the filename header at the top. */ if (depth == 0 && opts.paths_len == 1) { opts.print_heading = -1; } search_file(path); } else { log_err("Error opening directory %s: %s", path, strerror(errno)); } goto search_dir_cleanup; } int offset_vector[3]; int rc = 0; work_queue_t *queue_item; for (i = 0; i < results; i++) { queue_item = NULL; dir = dir_list[i]; ag_asprintf(&dir_full_path, "%s/%s", path, dir->d_name); /* If a link points to a directory then we need to treat it as a directory. */ if (!opts.follow_symlinks && is_symlink(path, dir)) { log_debug("File %s ignored becaused it's a symlink", dir->d_name); goto cleanup; } if (!is_directory(path, dir)) { if (opts.file_search_regex) { rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path), 0, 0, offset_vector, 3); if (rc < 0) { /* no match */ log_debug("Skipping %s due to file_search_regex.", dir_full_path); goto cleanup; } else if (opts.match_files) { log_debug("match_files: file_search_regex matched for %s.", dir_full_path); pthread_mutex_lock(&print_mtx); print_path(dir_full_path, '\n'); pthread_mutex_unlock(&print_mtx); goto cleanup; } } queue_item = ag_malloc(sizeof(work_queue_t)); queue_item->path = dir_full_path; queue_item->next = NULL; pthread_mutex_lock(&work_queue_mtx); if (work_queue_tail == NULL) { work_queue = queue_item; } else { work_queue_tail->next = queue_item; } work_queue_tail = queue_item; pthread_mutex_unlock(&work_queue_mtx); pthread_cond_signal(&files_ready); log_debug("%s added to work queue", dir_full_path); } else if (opts.recurse_dirs) { if (depth < opts.max_search_depth) { log_debug("Searching dir %s", dir_full_path); ignores *child_ig = init_ignore(ig); search_dir(child_ig, base_path, dir_full_path, depth + 1); cleanup_ignore(child_ig); } else { log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path); } } cleanup: ; free(dir); dir = NULL; if (queue_item == NULL) { free(dir_full_path); dir_full_path = NULL; } } search_dir_cleanup: ; free(dir_list); dir_list = NULL; }
int main(int argc, char **argv) { char **base_paths = NULL; char **paths = NULL; int i; int pcre_opts = PCRE_MULTILINE; int study_opts = 0; double time_diff; worker_t *workers = NULL; int workers_len; int num_cores; #ifdef KJK_BUILD extern void setup_crash_handler(); /* in kjk_crash_handler.cpp */ setup_crash_handler(); #endif set_log_level(LOG_LEVEL_WARN); work_queue = NULL; work_queue_tail = NULL; memset(&stats, 0, sizeof(stats)); root_ignores = init_ignore(NULL, "", 0); out_fd = stdout; #ifdef USE_PCRE_JIT int has_jit = 0; pcre_config(PCRE_CONFIG_JIT, &has_jit); if (has_jit) { study_opts |= PCRE_STUDY_JIT_COMPILE; } #endif gettimeofday(&(stats.time_start), NULL); parse_options(argc, argv, &base_paths, &paths); log_debug("PCRE Version: %s", pcre_version()); #ifdef _WIN32 { SYSTEM_INFO si; GetSystemInfo(&si); num_cores = si.dwNumberOfProcessors; } #else num_cores = (int)sysconf(_SC_NPROCESSORS_ONLN); #endif workers_len = num_cores; if (opts.literal) { workers_len--; } if (opts.workers) { workers_len = opts.workers; } if (workers_len < 1) { workers_len = 1; } log_debug("Using %i workers", workers_len); done_adding_files = FALSE; workers = (worker_t *) ag_calloc(workers_len, sizeof(worker_t)); if (pthread_cond_init(&files_ready, NULL)) { die("pthread_cond_init failed!"); } if (pthread_mutex_init(&print_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (pthread_mutex_init(&stats_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (pthread_mutex_init(&work_queue_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (opts.casing == CASE_SMART) { opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE; } if (opts.literal) { if (opts.casing == CASE_INSENSITIVE) { /* Search routine needs the query to be lowercase */ char *c = opts.query; for (; *c != '\0'; ++c) { *c = (char)tolower(*c); } } generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE); find_skip_lookup = NULL; generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE); if (opts.word_regexp) { init_wordchar_table(); opts.literal_starts_wordchar = is_wordchar(opts.query[0]); opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]); } } else { if (opts.casing == CASE_INSENSITIVE) { pcre_opts |= PCRE_CASELESS; } if (opts.word_regexp) { char *word_regexp_query; ag_asprintf(&word_regexp_query, "\\b%s\\b", opts.query); free(opts.query); opts.query = word_regexp_query; opts.query_len = strlen(opts.query); } compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts); } if (opts.search_stream) { // search_stream(stdin, ""); } else { for (i = 0; i < workers_len; i++) { workers[i].id = i; int rv = pthread_create(&(workers[i].thread), NULL, &search_file_worker, &(workers[i].id)); if (rv != 0) { die("error in pthread_create(): %s", strerror(rv)); } #if defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(USE_CPU_SET) if (opts.use_thread_affinity) { cpu_set_t cpu_set; CPU_ZERO(&cpu_set); CPU_SET(i % num_cores, &cpu_set); rv = pthread_setaffinity_np(workers[i].thread, sizeof(cpu_set), &cpu_set); if (rv != 0) { die("error in pthread_setaffinity_np(): %s", strerror(rv)); } log_debug("Thread %i set to CPU %i", i, i); } else { log_debug("Thread affinity disabled."); } #else log_debug("No CPU affinity support."); #endif } for (i = 0; paths[i] != NULL; i++) { log_debug("searching path %s for %s", paths[i], opts.query); symhash = NULL; ignores *ig = init_ignore(root_ignores, "", 0); struct stat s; s.st_dev = 0; #ifndef _WIN32 /* The device is ignored if opts.one_dev is false, so it's fine * to leave it at the default 0 */ if (opts.one_dev && lstat(paths[i], &s) == -1) { log_err("Failed to get device information for path %s. Skipping...", paths[i]); } #endif search_dir(ig, base_paths[i], paths[i], 0, s.st_dev); cleanup_ignore(ig); } pthread_mutex_lock(&work_queue_mtx); done_adding_files = TRUE; pthread_cond_broadcast(&files_ready); pthread_mutex_unlock(&work_queue_mtx); for (i = 0; i < workers_len; i++) { if (pthread_join(workers[i].thread, NULL)) { die("pthread_join failed!"); } } } if (opts.stats) { gettimeofday(&(stats.time_end), NULL); time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); time_diff /= 1000000; printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); } if (opts.pager) { pclose(out_fd); } cleanup_options(); pthread_cond_destroy(&files_ready); pthread_mutex_destroy(&work_queue_mtx); pthread_mutex_destroy(&stats_mtx); pthread_mutex_destroy(&print_mtx); cleanup_ignore(root_ignores); free(workers); for (i = 0; paths[i] != NULL; i++) { free(paths[i]); free(base_paths[i]); } free(base_paths); free(paths); if (find_skip_lookup) { free(find_skip_lookup); } return !opts.match_found; }
int main(int argc, char **argv) { char **paths = NULL; int i; int pcre_opts = PCRE_MULTILINE; int study_opts = 0; double time_diff; pthread_t *workers = NULL; int workers_len; set_log_level(LOG_LEVEL_WARN); work_queue = NULL; work_queue_tail = NULL; memset(&stats, 0, sizeof(stats)); root_ignores = init_ignore(NULL); out_fd = stdout; #ifdef USE_PCRE_JIT int has_jit = 0; pcre_config(PCRE_CONFIG_JIT, &has_jit); if (has_jit) { study_opts |= PCRE_STUDY_JIT_COMPILE; } #endif gettimeofday(&(stats.time_start), NULL); parse_options(argc, argv, &paths); log_debug("PCRE Version: %s", pcre_version()); if (opts.workers) { workers_len = opts.workers; } else { /* Experiments show that two worker threads appear to be optimal, both * on dual-core and quad-core systems. See * http://geoff.greer.fm/2012/09/07/the-silver-searcher-adding-pthreads/. * On single-core CPUs, more than one worker thread makes no sense. */ int ncpus = (int)sysconf(_SC_NPROCESSORS_ONLN); workers_len = (ncpus >= 2) ? 2 : 1; } log_debug("Using %i workers", workers_len); done_adding_files = FALSE; workers = ag_calloc(workers_len, sizeof(pthread_t)); if (pthread_cond_init(&files_ready, NULL)) { log_err("pthread_cond_init failed!"); exit(2); } if (pthread_mutex_init(&print_mtx, NULL)) { log_err("pthread_mutex_init failed!"); exit(2); } if (pthread_mutex_init(&stats_mtx, NULL)) { log_err("pthread_mutex_init failed!"); exit(2); } if (pthread_mutex_init(&work_queue_mtx, NULL)) { log_err("pthread_mutex_init failed!"); exit(2); } if (opts.casing == CASE_SMART) { opts.casing = contains_uppercase(opts.query) ? CASE_SENSITIVE : CASE_INSENSITIVE; } if (opts.literal) { if (opts.casing == CASE_INSENSITIVE) { /* Search routine needs the query to be lowercase */ char *c = opts.query; for (; *c != '\0'; ++c) { *c = (char) tolower(*c); } } generate_skip_lookup(opts.query, opts.query_len, skip_lookup, opts.casing == CASE_SENSITIVE); if (opts.word_regexp) { init_wordchar_table(); opts.literal_starts_wordchar = is_wordchar(opts.query[0]); opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]); } } else { if (opts.casing == CASE_INSENSITIVE) { pcre_opts = pcre_opts | PCRE_CASELESS; } if (opts.word_regexp) { char *word_regexp_query; asprintf(&word_regexp_query, "\\b%s\\b", opts.query); free(opts.query); opts.query = word_regexp_query; opts.query_len = strlen(opts.query); } compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts); } if (opts.search_stream) { search_stream(stdin, ""); } else { for (i = 0; i < workers_len; i++) { int ptc_rc = pthread_create(&(workers[i]), NULL, &search_file_worker, NULL); check_err(ptc_rc, "create worker thread"); } for (i = 0; paths[i] != NULL; i++) { log_debug("searching path %s for %s", paths[i], opts.query); search_dir(root_ignores, paths[i], 0); } done_adding_files = TRUE; pthread_cond_broadcast(&files_ready); for (i = 0; i < workers_len; i++) { if (pthread_join(workers[i], NULL)) { log_err("pthread_join failed!"); exit(2); } } } if (opts.stats) { gettimeofday(&(stats.time_end), NULL); time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); time_diff /= 1000000; printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); } if (opts.pager) { pclose(out_fd); } pthread_cond_destroy(&files_ready); pthread_mutex_destroy(&work_queue_mtx); pthread_mutex_destroy(&stats_mtx); pthread_mutex_destroy(&print_mtx); cleanup_ignore(root_ignores); free(workers); free(paths); return 0; }
int main(int argc, char **argv) { char **base_paths = NULL; char **paths = NULL; int i; int pcre_opts = PCRE_MULTILINE; int study_opts = 0; double time_diff; pthread_t *workers = NULL; int workers_len; set_log_level(LOG_LEVEL_WARN); work_queue = NULL; work_queue_tail = NULL; memset(&stats, 0, sizeof(stats)); root_ignores = init_ignore(NULL, "", 0); out_fd = stdout; #ifdef USE_PCRE_JIT int has_jit = 0; pcre_config(PCRE_CONFIG_JIT, &has_jit); if (has_jit) { study_opts |= PCRE_STUDY_JIT_COMPILE; } #endif gettimeofday(&(stats.time_start), NULL); parse_options(argc, argv, &base_paths, &paths); log_debug("PCRE Version: %s", pcre_version()); #ifdef _WIN32 { SYSTEM_INFO si; GetSystemInfo(&si); workers_len = si.dwNumberOfProcessors; } #else workers_len = (int)sysconf(_SC_NPROCESSORS_ONLN); #endif if (opts.literal) { workers_len--; } if (opts.workers) { workers_len = opts.workers; } if (workers_len < 1) { workers_len = 1; } log_debug("Using %i workers", workers_len); done_adding_files = FALSE; workers = ag_calloc(workers_len, sizeof(pthread_t)); if (pthread_cond_init(&files_ready, NULL)) { die("pthread_cond_init failed!"); } if (pthread_mutex_init(&print_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (pthread_mutex_init(&stats_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (pthread_mutex_init(&work_queue_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (opts.casing == CASE_SMART) { opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE; } if (opts.literal) { if (opts.casing == CASE_INSENSITIVE) { /* Search routine needs the query to be lowercase */ char *c = opts.query; for (; *c != '\0'; ++c) { *c = (char)tolower(*c); } } generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE); find_skip_lookup = NULL; generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE); if (opts.word_regexp) { init_wordchar_table(); opts.literal_starts_wordchar = is_wordchar(opts.query[0]); opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]); } } else { if (opts.casing == CASE_INSENSITIVE) { pcre_opts |= PCRE_CASELESS; } if (opts.word_regexp) { char *word_regexp_query; ag_asprintf(&word_regexp_query, "\\b%s\\b", opts.query); free(opts.query); opts.query = word_regexp_query; opts.query_len = strlen(opts.query); } compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts); } if (opts.search_stream) { search_stream(stdin, ""); } else { for (i = 0; i < workers_len; i++) { int rv = pthread_create(&(workers[i]), NULL, &search_file_worker, &i); if (rv != 0) { die("error in pthread_create(): %s", strerror(rv)); } } for (i = 0; paths[i] != NULL; i++) { log_debug("searching path %s for %s", paths[i], opts.query); symhash = NULL; ignores *ig = init_ignore(root_ignores, "", 0); search_dir(ig, base_paths[i], paths[i], 0); cleanup_ignore(ig); } pthread_mutex_lock(&work_queue_mtx); done_adding_files = TRUE; pthread_cond_broadcast(&files_ready); pthread_mutex_unlock(&work_queue_mtx); for (i = 0; i < workers_len; i++) { if (pthread_join(workers[i], NULL)) { die("pthread_join failed!"); } } } if (opts.stats) { gettimeofday(&(stats.time_end), NULL); time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); time_diff /= 1000000; printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); } if (opts.pager) { pclose(out_fd); } cleanup_options(); pthread_cond_destroy(&files_ready); pthread_mutex_destroy(&work_queue_mtx); pthread_mutex_destroy(&stats_mtx); pthread_mutex_destroy(&print_mtx); cleanup_ignore(root_ignores); free(workers); for (i = 0; paths[i] != NULL; i++) { free(paths[i]); free(base_paths[i]); } free(base_paths); free(paths); if (find_skip_lookup) { free(find_skip_lookup); } return !opts.match_found; }