/* TODO: Append matches to some data structure instead of just printing them out. * Then ag can have sweet summaries of matches/files scanned/time/etc. */ void search_dir(ignores *ig, const char *base_path, const char *path, const int depth) { struct dirent **dir_list = NULL; struct dirent *dir = NULL; scandir_baton_t scandir_baton; int results = 0; char *dir_full_path = NULL; const char *ignore_file = NULL; int i; /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */ for (i = 0; opts.skip_vcs_ignores ? (i == 0) : (ignore_pattern_files[i] != NULL); i++) { ignore_file = ignore_pattern_files[i]; ag_asprintf(&dir_full_path, "%s/%s", path, ignore_file); if (strcmp(SVN_DIR, ignore_file) == 0) { load_svn_ignore_patterns(ig, dir_full_path); } else { load_ignore_patterns(ig, dir_full_path); } free(dir_full_path); dir_full_path = NULL; } if (opts.path_to_agignore) { load_ignore_patterns(ig, opts.path_to_agignore); } scandir_baton.ig = ig; scandir_baton.base_path = base_path; results = ag_scandir(path, &dir_list, &filename_filter, &scandir_baton); if (results == 0) { log_debug("No results found in directory %s", path); goto search_dir_cleanup; } else if (results == -1) { if (errno == ENOTDIR) { /* Not a directory. Probably a file. */ /* If we're only searching one file, don't print the filename header at the top. */ if (depth == 0 && opts.paths_len == 1) { opts.print_heading = -1; } search_file(path); } else { log_err("Error opening directory %s: %s", path, strerror(errno)); } goto search_dir_cleanup; } int offset_vector[3]; int rc = 0; work_queue_t *queue_item; for (i = 0; i < results; i++) { queue_item = NULL; dir = dir_list[i]; ag_asprintf(&dir_full_path, "%s/%s", path, dir->d_name); /* If a link points to a directory then we need to treat it as a directory. */ if (!opts.follow_symlinks && is_symlink(path, dir)) { log_debug("File %s ignored becaused it's a symlink", dir->d_name); goto cleanup; } if (!is_directory(path, dir)) { if (opts.file_search_regex) { rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path), 0, 0, offset_vector, 3); if (rc < 0) { /* no match */ log_debug("Skipping %s due to file_search_regex.", dir_full_path); goto cleanup; } else if (opts.match_files) { log_debug("match_files: file_search_regex matched for %s.", dir_full_path); pthread_mutex_lock(&print_mtx); print_path(dir_full_path, '\n'); pthread_mutex_unlock(&print_mtx); goto cleanup; } } queue_item = ag_malloc(sizeof(work_queue_t)); queue_item->path = dir_full_path; queue_item->next = NULL; pthread_mutex_lock(&work_queue_mtx); if (work_queue_tail == NULL) { work_queue = queue_item; } else { work_queue_tail->next = queue_item; } work_queue_tail = queue_item; pthread_mutex_unlock(&work_queue_mtx); pthread_cond_signal(&files_ready); log_debug("%s added to work queue", dir_full_path); } else if (opts.recurse_dirs) { if (depth < opts.max_search_depth) { log_debug("Searching dir %s", dir_full_path); ignores *child_ig = init_ignore(ig); search_dir(child_ig, base_path, dir_full_path, depth + 1); cleanup_ignore(child_ig); } else { log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path); } } cleanup: ; free(dir); dir = NULL; if (queue_item == NULL) { free(dir_full_path); dir_full_path = NULL; } } search_dir_cleanup: ; free(dir_list); dir_list = NULL; }
void parse_options(int argc, char **argv, char **paths[]) { int ch; int i; const char *pcre_err = NULL; int pcre_err_offset = 0; int path_len = 0; int useless = 0; int group = 1; int help = 0; int version = 0; int opt_index = 0; const char *home_dir = getenv("HOME"); char *ignore_file_path = NULL; init_options(); struct option longopts[] = { { "ackmate", no_argument, &(opts.ackmate), 1 }, { "ackmate-dir-filter", required_argument, NULL, 0 }, { "after", required_argument, NULL, 'A' }, { "all-types", no_argument, NULL, 'a' }, { "before", required_argument, NULL, 'B' }, { "break", no_argument, &(opts.print_break), 1 }, { "nobreak", no_argument, &(opts.print_break), 0 }, { "color", no_argument, &(opts.color), 1 }, { "nocolor", no_argument, &(opts.color), 0 }, { "column", no_argument, &(opts.column), 1 }, { "context", optional_argument, &(opts.context), 2 }, { "debug", no_argument, NULL, 'D' }, { "depth", required_argument, NULL, 0 }, { "follow", no_argument, &(opts.follow_symlinks), 1 }, { "file-search-regex", required_argument, NULL, 'G' }, { "group", no_argument, &(group), 1 }, { "nogroup", no_argument, &(group), 0 }, { "hidden", no_argument, &(opts.search_hidden_files), 1 }, { "invert-match", no_argument, &(opts.invert_match), 1 }, { "nofollow", no_argument, &(opts.follow_symlinks), 0 }, { "heading", no_argument, &(opts.print_heading), 1 }, { "noheading", no_argument, &(opts.print_heading), 0 }, { "no-recurse", no_argument, NULL, 'n' }, { "help", no_argument, NULL, 'h' }, { "ignore-case", no_argument, NULL, 'i' }, { "files-with-matches", no_argument, NULL, 'l' }, { "literal", no_argument, &(opts.literal), 1 }, { "match", no_argument, &useless, 0 }, { "max-count", required_argument, NULL, 'm' }, { "print-long-lines", no_argument, &(opts.print_long_lines), 1 }, { "search-binary", no_argument, &(opts.search_binary_files), 1 }, { "search-files", no_argument, &(opts.search_stream), 0 }, { "smart-case", no_argument, &useless, 0 }, { "nosmart-case", no_argument, &useless, 0 }, { "stats", no_argument, &(opts.stats), 1 }, { "unrestricted", no_argument, NULL, 'u' }, { "version", no_argument, &version, 1 }, { NULL, 0, NULL, 0 } }; if (argc < 2) { usage(); exit(1); } /* stdin isn't a tty. something's probably being piped to ag */ if (!isatty(fileno(stdin))) { opts.search_stream = 1; } /* If we're not outputting to a terminal. change output to: * turn off colors * print filenames on every line */ if (!isatty(fileno(stdout))) { opts.color = 0; group = 0; } while ((ch = getopt_long(argc, argv, "A:aB:C:DG:fhilm:nvVu", longopts, &opt_index)) != -1) { switch (ch) { case 'A': opts.after = atoi(optarg); break; case 'a': opts.search_all_files = 1; opts.search_binary_files = 1; break; case 'B': opts.before = atoi(optarg); break; case 'C': opts.context = atoi(optarg); break; case 'D': set_log_level(LOG_LEVEL_DEBUG); break; case 'f': opts.follow_symlinks = 1; break; case 'G': opts.file_search_regex = pcre_compile(optarg, 0, &pcre_err, &pcre_err_offset, NULL); if (opts.file_search_regex == NULL) { log_err("pcre_compile of file-search-regex failed at position %i. Error: %s", pcre_err_offset, pcre_err); exit(1); } opts.file_search_regex_extra = pcre_study(opts.file_search_regex, 0, &pcre_err); if (opts.file_search_regex_extra == NULL) { log_err("pcre_study of file-search-regex failed. Error: %s", pcre_err); exit(1); } break; case 'h': help = 1; break; case 'i': opts.casing = CASE_INSENSITIVE; break; case 'l': opts.print_filename_only = 1; break; case 'm': opts.max_matches_per_file = atoi(optarg); break; case 'n': opts.recurse_dirs = 0; break; case 'u': opts.search_binary_files = 1; opts.search_unrestricted = 1; opts.search_hidden_files = 1; break; case 'v': opts.invert_match = 1; break; case 'V': version = 1; break; case 0: /* Long option */ if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) { opts.ackmate_dir_filter = pcre_compile(optarg, 0, &pcre_err, &pcre_err_offset, NULL); if (opts.ackmate_dir_filter == NULL) { log_err("pcre_compile of ackmate-dir-filter failed at position %i. Error: %s", pcre_err_offset, pcre_err); exit(1); } opts.ackmate_dir_filter_extra = pcre_study(opts.ackmate_dir_filter, 0, &pcre_err); if (opts.ackmate_dir_filter_extra == NULL) { log_err("pcre_study of ackmate-dir-filter failed. Error: %s", pcre_err); exit(1); } break; } else if (strcmp(longopts[opt_index].name, "depth") == 0) { opts.max_search_depth = atoi(optarg); break; } /* Continue to usage if we don't recognize the option */ if (longopts[opt_index].flag != 0) { break; } log_err("option %s does not take a value", longopts[opt_index].name); default: usage(); exit(1); } } argc -= optind; argv += optind; if (help) { usage(); exit(0); } if (version) { print_version(); exit(0); } if (argc == 0) { log_err("What do you want to search for?"); exit(1); } if (home_dir) { log_debug("Found user's home dir: %s", home_dir); size_t path_length = (size_t)(strlen(home_dir) + strlen(ignore_pattern_files[0])+2); ignore_file_path = malloc(path_length); strlcpy(ignore_file_path, home_dir, path_length); strlcat(ignore_file_path, "/", path_length); strlcat(ignore_file_path, ignore_pattern_files[0], path_length); load_ignore_patterns(ignore_file_path); free(ignore_file_path); } if (opts.context > 0) { opts.before = opts.context; opts.after = opts.context; } if (opts.ackmate) { opts.color = 0; opts.print_break = 1; group = 1; opts.search_stream = 0; } if (opts.print_heading == 0 || opts.print_break == 0) { goto skip_group; } if (group) { opts.print_heading = 1; opts.print_break = 1; } else { opts.print_heading = 0; opts.print_break = 0; } skip_group: if (opts.search_stream) { opts.print_break = 0; opts.print_heading = 0; opts.print_line_numbers = 0; } opts.query = strdup(argv[0]); opts.query_len = strlen(opts.query); log_debug("Query is %s", opts.query); if (opts.query_len == 0) { log_err("Error: No query. What do you want to search for?"); exit(1); } char *path = NULL; if (argc > 1) { *paths = malloc(sizeof(char*) * argc); for (i = 1; i < argc; i++) { path = strdup(argv[i]); path_len = strlen(path); /* kill trailing slash */ if (path_len > 0 && path[path_len - 1] == '/') { path[path_len - 1] = '\0'; } (*paths)[i-1] = path; } (*paths)[i-1] = NULL; } else { path = strdup("."); *paths = malloc(sizeof(char*) * 2); (*paths)[0] = path; (*paths)[1] = NULL; } }
/* TODO: append matches to some data structure instead of just printing them out * then there can be sweet summaries of matches/files scanned/time/etc */ int search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) { /* TODO: don't just die. also make max depth configurable */ if (depth > MAX_SEARCH_DEPTH) { log_err("Search depth greater than %i, giving up.", depth); exit(1); } struct dirent **dir_list = NULL; struct dirent *dir = NULL; int results = 0; int fd = -1; off_t f_len = 0; char *buf = NULL; int rv = 0; char *dir_full_path = NULL; size_t path_length = 0; int i; results = scandir(path, &dir_list, &ignorefile_filter, &alphasort); if (results > 0) { for (i = 0; i < results; i++) { dir = dir_list[i]; path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */ dir_full_path = malloc(path_length); dir_full_path = strncpy(dir_full_path, path, path_length); dir_full_path = strncat(dir_full_path, "/", path_length); dir_full_path = strncat(dir_full_path, dir->d_name, path_length); load_ignore_patterns(dir_full_path); free(dir); dir = NULL; free(dir_full_path); dir_full_path = NULL; } } free(dir_list); dir_list = NULL; results = scandir(path, &dir_list, &filename_filter, &alphasort); if (results == 0) { log_debug("No results found in directory %s", path); free(dir_list); dir_list = NULL; return(0); } else if (results == -1) { log_err("Error opening directory %s", path); return(0); } match matches[MAX_MATCHES_PER_FILE]; int matches_len = 0; int buf_len = 0; int buf_offset = 0; int offset_vector[MAX_MATCHES_PER_FILE * 3]; /* TODO */ int rc = 0; struct stat statbuf; int binary = 0; for (i=0; i<results; i++) { matches_len = 0; buf_offset = 0; binary = 0; dir = dir_list[i]; /* TODO: this is copy-pasted from about 30 lines above */ path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */ dir_full_path = malloc(path_length); dir_full_path = strncpy(dir_full_path, path, path_length); dir_full_path = strncat(dir_full_path, "/", path_length); dir_full_path = strncat(dir_full_path, dir->d_name, path_length); log_debug("dir %s type %i", dir_full_path, dir->d_type); /* TODO: scan files in current dir before going deeper */ if (dir->d_type == DT_DIR) { if (opts.recurse_dirs) { log_debug("Searching dir %s", dir_full_path); rv = search_dir(re, re_extra, dir_full_path, depth + 1); } goto cleanup; } if (opts.file_search_regex) { rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path), buf_offset, 0, offset_vector, 3); if (rc < 0) { /* no match */ log_debug("Skipping %s due to file_search_regex.", dir_full_path); goto cleanup; } } fd = open(dir_full_path, O_RDONLY); if (fd < 0) { log_err("Error opening file %s. Skipping...", dir_full_path); goto cleanup; } rv = fstat(fd, &statbuf); if (rv != 0) { log_err("Error fstat()ing file %s. Skipping...", dir_full_path); goto cleanup; } f_len = statbuf.st_size; if (f_len == 0) { log_debug("File %s is empty, skipping.", dir_full_path); goto cleanup; } else if (f_len > 1024 * 1024 * 1024) { /* 1 GB */ log_err("File %s is too big. Skipping...", dir_full_path); goto cleanup; } buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { log_err("File %s failed to load: %s.", dir_full_path, strerror(errno)); goto cleanup; } buf_len = f_len; if (is_binary((void*)buf, buf_len)) { /* Who needs duck typing when you have void cast? :) */ if (opts.search_binary_files) { binary = 1; } else { log_debug("File %s is binary. Skipping...", dir_full_path); goto cleanup; } } if (opts.literal) { char *match_ptr = buf; char *(*ag_strncmp_fp)(const char*, const char*, size_t) = &ag_strnstr; if (opts.casing == CASE_INSENSITIVE) { ag_strncmp_fp = &ag_strncasestr; } while (buf_offset < buf_len) { match_ptr = ag_strncmp_fp(match_ptr, opts.query, buf_len - buf_offset); if (match_ptr == NULL) { break; } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; matches_len++; match_ptr++; /* Don't segfault. TODO: realloc this array */ if (matches_len >= MAX_MATCHES_PER_FILE) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { /* In my profiling, most of the execution time is spent in this pcre_exec */ while (buf_offset < buf_len && (rc = pcre_exec(re, re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; /* Don't segfault. TODO: realloc this array */ if (matches_len >= MAX_MATCHES_PER_FILE) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.stats) { total_file_count++; total_byte_count += buf_len; } if (rc == -1) { log_debug("No match in %s", dir_full_path); } if (matches_len > 0) { if (opts.print_filename_only) { print_path(dir_full_path); } else { if (binary) { printf("Binary file %s matches.\n", dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } } } cleanup: if (fd != -1) { munmap(buf, f_len); close(fd); fd = -1; } free(dir); dir = NULL; free(dir_full_path); dir_full_path = NULL; } free(dir_list); dir_list = NULL; return(0); }
/* TODO: append matches to some data structure instead of just printing them out * then there can be sweet summaries of matches/files scanned/time/etc */ void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) { struct dirent **dir_list = NULL; struct dirent *dir = NULL; int results = 0; int fd = -1; off_t f_len = 0; char *buf = NULL; char *dir_full_path = NULL; size_t path_length = 0; int i; /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */ #ifdef AG_OS_BSD results = scandir(path, &dir_list, &ignorefile_filter, &alphasort); #else results = scandir(path, &dir_list, (int (*)(const struct dirent *))&ignorefile_filter, &alphasort); #endif if (results > 0) { for (i = 0; i < results; i++) { dir = dir_list[i]; path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */ dir_full_path = malloc(path_length); strlcpy(dir_full_path, path, path_length); strlcat(dir_full_path, "/", path_length); strlcat(dir_full_path, dir->d_name, path_length); load_ignore_patterns(dir_full_path); free(dir); dir = NULL; free(dir_full_path); dir_full_path = NULL; } } free(dir_list); dir_list = NULL; #ifdef AG_OS_BSD results = scandir(path, &dir_list, &filename_filter, &alphasort); #else results = scandir(path, &dir_list, (int (*)(const struct dirent *))&filename_filter, &alphasort); #endif if (results == 0) { log_debug("No results found in directory %s", path); free(dir_list); dir_list = NULL; return; } else if (results == -1) { if (errno == ENOTDIR) { /* Not a directory. Probably a file. */ /* If we're only searching one file, don't print the filename header at the top. */ opts.print_heading = depth == 0 ? -1 : opts.print_heading; search_file(re, re_extra, path); return; } else { log_err("Error opening directory %s: %s", path, strerror(errno)); return; } } int offset_vector[3]; int rc = 0; struct stat stDirInfo; for (i = 0; i < results; i++) { dir = dir_list[i]; /* TODO: this is copy-pasted from about 30 lines above */ path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */ dir_full_path = malloc(path_length); strlcpy(dir_full_path, path, path_length); strlcat(dir_full_path, "/", path_length); strlcat(dir_full_path, dir->d_name, path_length); /* Some filesystems, e.g. ReiserFS, always return a type DT_UNKNOWN from readdir or scandir. */ /* Call lstat if we find DT_UNKNOWN to get the information we need. */ if (dir->d_type == DT_UNKNOWN) { if (lstat(dir_full_path, &stDirInfo) != -1) { if (S_ISDIR(stDirInfo.st_mode)) { dir->d_type = DT_DIR; } else if (S_ISLNK(stDirInfo.st_mode)) { dir->d_type = DT_LNK; } } else { log_err("lstat() failed on %s", dir_full_path); /* If lstat fails we may as well carry on and hope for the best. */ } if (!opts.follow_symlinks && dir->d_type == DT_LNK) { log_debug("File %s ignored becaused it's a symlink", dir->d_name); goto cleanup; } } /* If a link points to a directory then we need to treat it as a directory. */ if (dir->d_type == DT_LNK) { if (stat(dir_full_path, &stDirInfo) != -1) { if (S_ISDIR(stDirInfo.st_mode)) { dir->d_type = DT_DIR; } } else { log_err("stat() failed on %s", dir_full_path); /* If stat fails we may as well carry on and hope for the best. */ } } log_debug("dir %s type %i", dir_full_path, dir->d_type); if (opts.file_search_regex) { rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path), 0, 0, offset_vector, 3); if (rc < 0) { /* no match */ log_debug("Skipping %s due to file_search_regex.", dir_full_path); goto cleanup; } } /* TODO: scan files in current dir before going deeper */ if (dir->d_type == DT_DIR) { if (opts.recurse_dirs) { if (depth < opts.max_search_depth) { log_debug("Searching dir %s", dir_full_path); search_dir(re, re_extra, dir_full_path, depth + 1); } else { log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path); } } goto cleanup; } search_file(re, re_extra, dir_full_path); cleanup: if (fd != -1) { munmap(buf, f_len); close(fd); fd = -1; } free(dir); dir = NULL; free(dir_full_path); dir_full_path = NULL; } free(dir_list); dir_list = NULL; return; }
/* TODO: append matches to some data structure instead of just printing them out * then there can be sweet summaries of matches/files scanned/time/etc */ void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) { struct dirent **dir_list = NULL; struct dirent *dir = NULL; int results = 0; int fd = -1; off_t f_len = 0; char *buf = NULL; char *dir_full_path = NULL; size_t path_length = 0; int i; /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */ #ifdef AG_OS_BSD results = scandir(path, &dir_list, &ignorefile_filter, &alphasort); #else results = scandir(path, &dir_list, (int (*)(const struct dirent *))&ignorefile_filter, &alphasort); #endif if (results > 0) { for (i = 0; i < results; i++) { dir = dir_list[i]; path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */ dir_full_path = malloc(path_length); strlcpy(dir_full_path, path, path_length); strlcat(dir_full_path, "/", path_length); strlcat(dir_full_path, dir->d_name, path_length); load_ignore_patterns(dir_full_path); free(dir); dir = NULL; free(dir_full_path); dir_full_path = NULL; } } free(dir_list); dir_list = NULL; #ifdef AG_OS_BSD results = scandir(path, &dir_list, &filename_filter, &alphasort); #else results = scandir(path, &dir_list, (int (*)(const struct dirent *))&filename_filter, &alphasort); #endif if (results == 0) { log_debug("No results found in directory %s", path); free(dir_list); dir_list = NULL; return; } else if (results == -1) { if (errno == ENOTDIR) { /* Not a directory. Probably a file. */ /* If we're only searching one file, don't print the filename header at the top. */ opts.print_heading = depth == 0 ? -1 : opts.print_heading; search_file(re, re_extra, path); return; } else { log_err("Error opening directory %s: %s", path, strerror(errno)); return; } } int offset_vector[3]; int rc = 0; for (i=0; i<results; i++) { dir = dir_list[i]; /* TODO: this is copy-pasted from about 30 lines above */ path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */ dir_full_path = malloc(path_length); strlcpy(dir_full_path, path, path_length); strlcat(dir_full_path, "/", path_length); strlcat(dir_full_path, dir->d_name, path_length); log_debug("dir %s type %i", dir_full_path, dir->d_type); if (opts.file_search_regex) { rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path), 0, 0, offset_vector, 3); if (rc < 0) { /* no match */ log_debug("Skipping %s due to file_search_regex.", dir_full_path); goto cleanup; } } /* TODO: scan files in current dir before going deeper */ if (dir->d_type == DT_DIR) { if (opts.recurse_dirs) { if (depth < opts.max_search_depth) { log_debug("Searching dir %s", dir_full_path); search_dir(re, re_extra, dir_full_path, depth + 1); } else { log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path); } } goto cleanup; } search_file(re, re_extra, dir_full_path); cleanup: if (fd != -1) { munmap(buf, f_len); close(fd); fd = -1; } free(dir); dir = NULL; free(dir_full_path); dir_full_path = NULL; } free(dir_list); dir_list = NULL; return; }
//TODO: append matches to some data structure instead of just printing them out // then there can be sweet summaries of matches/files scanned/time/etc int search_dir(pcre *re, const char* path, const int depth) { //TODO: don't just die. also make max depth configurable if(depth > MAX_SEARCH_DEPTH) { log_err("Search depth greater than %i, giving up.", depth); exit(1); } struct dirent **dir_list = NULL; struct dirent *dir = NULL; int results = 0; FILE *fp = NULL; int f_len; size_t r_len; char *buf = NULL; int rv = 0; char *dir_full_path = NULL; size_t path_length = 0; results = scandir(path, &dir_list, &ignorefile_filter, &alphasort); if (results > 0) { for (int i = 0; i < results; i++) { dir = dir_list[i]; path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); // 2 for slash and null char dir_full_path = malloc(path_length); dir_full_path = strncpy(dir_full_path, path, path_length); dir_full_path = strncat(dir_full_path, "/", path_length); dir_full_path = strncat(dir_full_path, dir->d_name, path_length); load_ignore_patterns(dir_full_path); free(dir); free(dir_full_path); } } free(dir_list); results = scandir(path, &dir_list, &filename_filter, &alphasort); if (results == 0) { log_debug("No results found"); free(dir_list); return(0); } else if (results == -1) { log_err("Error opening directory %s", path); return(0); } for (int i=0; i<results; i++) { dir = dir_list[i]; // XXX: this is copy-pasted from about 30 lines above path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); // 2 for slash and null char dir_full_path = malloc(path_length); dir_full_path = strncpy(dir_full_path, path, path_length); dir_full_path = strncat(dir_full_path, "/", path_length); dir_full_path = strncat(dir_full_path, dir->d_name, path_length); log_debug("dir %s type %i", dir_full_path, dir->d_type); //TODO: scan files in current dir before going deeper if (dir->d_type == DT_DIR && opts.recurse_dirs) { log_debug("searching dir %s", dir_full_path); rv = search_dir(re, dir_full_path, depth + 1); goto cleanup; continue; } fp = fopen(dir_full_path, "r"); if (fp == NULL) { log_err("Error opening file %s. Skipping...", dir_full_path); goto cleanup; continue; } rv = fseek(fp, 0, SEEK_END); if (rv != 0) { log_err("Error fseek()ing file %s. Skipping...", dir_full_path); goto cleanup; } f_len = ftell(fp); //TODO: behave differently if file is HUGE. on 32 bit, anything > 2GB will screw up this program if (f_len == 0) { log_debug("file is empty. skipping"); goto cleanup; } rewind(fp); buf = (char*) malloc(sizeof(char) * f_len + 1); r_len = fread(buf, 1, f_len, fp); buf[r_len] = '\0'; int buf_len = (int)r_len; int buf_offset = 0; int offset_vector[MAX_MATCHES_PER_FILE * 2]; //XXXX int rc = 0; char *match_start = NULL; char *match_end = NULL; int first_match = 1; // In my profiling, most of the execution time is spent in this pcre_exec while(buf_offset < buf_len && (rc = pcre_exec(re, NULL, buf, r_len, buf_offset, 0, offset_vector, sizeof(offset_vector))) >= 0 ) { log_debug("Match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); match_start = buf + offset_vector[0]; match_end = buf + offset_vector[1]; buf_offset = offset_vector[1]; print_match(dir_full_path, buf, match_start, match_end, first_match); first_match = 0; } free(buf); cleanup: if (fp != NULL) { fclose(fp); } free(dir); free(dir_full_path); } free(dir_list); return(0); }
void parse_options(int argc, char **argv, char **paths[]) { int ch; int i; int path_len = 0; int useless = 0; int group = 1; int help = 0; int version = 0; int opt_index = 0; const char *home_dir = getenv("HOME"); char *ignore_file_path = NULL; int needs_query = 1; init_options(); struct option longopts[] = { { "ackmate", no_argument, &(opts.ackmate), 1 }, { "ackmate-dir-filter", required_argument, NULL, 0 }, { "after", required_argument, NULL, 'A' }, { "all-text", no_argument, NULL, 't' }, { "all-types", no_argument, NULL, 'a' }, { "before", required_argument, NULL, 'B' }, { "break", no_argument, &(opts.print_break), 1 }, { "case-sensitive", no_argument, NULL, 's' }, { "color", no_argument, &(opts.color), 1 }, { "column", no_argument, &(opts.column), 1 }, { "context", optional_argument, NULL, 'C' }, { "debug", no_argument, NULL, 'D' }, { "depth", required_argument, NULL, 0 }, { "file-search-regex", required_argument, NULL, 'G' }, { "files-with-matches", no_argument, NULL, 'l' }, { "files-without-matches", no_argument, NULL, 'L' }, { "follow", no_argument, &(opts.follow_symlinks), 1 }, { "group", no_argument, &(group), 1 }, { "heading", no_argument, &(opts.print_heading), 1 }, { "help", no_argument, NULL, 'h' }, { "hidden", no_argument, &(opts.search_hidden_files), 1 }, { "ignore", required_argument, NULL, 0 }, { "ignore-case", no_argument, NULL, 'i' }, { "invert-match", no_argument, &(opts.invert_match), 1 }, { "literal", no_argument, NULL, 'Q' }, { "match", no_argument, &useless, 0 }, { "max-count", required_argument, NULL, 'm' }, { "no-recurse", no_argument, NULL, 'n' }, { "nobreak", no_argument, &(opts.print_break), 0 }, { "nocolor", no_argument, &(opts.color), 0 }, { "nofollow", no_argument, &(opts.follow_symlinks), 0 }, { "nogroup", no_argument, &(group), 0 }, { "noheading", no_argument, &(opts.print_heading), 0 }, { "parallel", no_argument, &(opts.parallel), 1}, { "path-to-agignore", required_argument, NULL, 'p'}, { "print-long-lines", no_argument, &(opts.print_long_lines), 1 }, { "recurse", no_argument, NULL, 'r' }, { "search-binary", no_argument, &(opts.search_binary_files), 1 }, { "search-files", no_argument, &(opts.search_stream), 0 }, { "skip-vcs-ignores", no_argument, NULL, 'U' }, { "smart-case", no_argument, NULL, 'S' }, { "stats", no_argument, &(opts.stats), 1 }, { "unrestricted", no_argument, NULL, 'u' }, { "version", no_argument, &version, 1 }, { "word-regexp", no_argument, NULL, 'w' }, { "workers", required_argument, NULL, 0 }, { NULL, 0, NULL, 0 } }; if (argc < 2) { usage(); exit(1); } /* stdin isn't a tty. something's probably being piped to ag */ if (!isatty(fileno(stdin))) { opts.search_stream = 1; } /* If we're not outputting to a terminal. change output to: * turn off colors * print filenames on every line */ if (!isatty(fileno(stdout))) { opts.color = 0; group = 0; } while ((ch = getopt_long(argc, argv, "A:aB:C:DG:g:fhiLlm:np:QRrSsvVtuUw", longopts, &opt_index)) != -1) { switch (ch) { case 'A': opts.after = atoi(optarg); break; case 'a': opts.search_all_files = 1; opts.search_binary_files = 1; break; case 'B': opts.before = atoi(optarg); break; case 'C': if (optarg) { opts.context = atoi(optarg); if (opts.context == 0 && errno == EINVAL) { /* This arg must be the search string instead of the context length */ optind--; opts.context = DEFAULT_CONTEXT_LEN; } } else { opts.context = DEFAULT_CONTEXT_LEN; } break; case 'D': set_log_level(LOG_LEVEL_DEBUG); break; case 'f': opts.follow_symlinks = 1; break; case 'g': needs_query = 0; opts.match_files = 1; /* Fall through and build regex */ case 'G': compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, optarg, 0, 0); break; case 'h': help = 1; break; case 'i': opts.casing = CASE_INSENSITIVE; break; case 'L': opts.invert_match = 1; /* fall through */ case 'l': opts.print_filename_only = 1; break; case 'm': opts.max_matches_per_file = atoi(optarg); break; case 'n': opts.recurse_dirs = 0; break; case 'p': opts.path_to_agignore = optarg; break; case 'Q': opts.literal = 1; break; case 'R': case 'r': opts.recurse_dirs = 1; break; case 'S': opts.casing = CASE_SMART; break; case 's': opts.casing = CASE_SENSITIVE; break; case 't': opts.search_all_files = 1; break; case 'u': opts.search_binary_files = 1; opts.search_all_files = 1; opts.search_hidden_files = 1; break; case 'U': opts.skip_vcs_ignores = 1; break; case 'v': opts.invert_match = 1; break; case 'V': version = 1; break; case 'w': opts.word_regexp = 1; break; case 0: /* Long option */ if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) { compile_study(&opts.ackmate_dir_filter, &opts.ackmate_dir_filter_extra, optarg, 0, 0); break; } else if (strcmp(longopts[opt_index].name, "depth") == 0) { opts.max_search_depth = atoi(optarg); break; } else if (strcmp(longopts[opt_index].name, "ignore") == 0) { add_ignore_pattern(root_ignores, optarg); break; } else if (strcmp(longopts[opt_index].name, "workers") == 0) { opts.workers = atoi(optarg); break; } /* Continue to usage if we don't recognize the option */ if (longopts[opt_index].flag != 0) { break; } log_err("option %s does not take a value", longopts[opt_index].name); default: usage(); exit(1); } } argc -= optind; argv += optind; if (help) { usage(); exit(0); } if (version) { print_version(); exit(0); } if (needs_query && argc == 0) { log_err("What do you want to search for?"); exit(1); } if (home_dir && !opts.search_all_files) { log_debug("Found user's home dir: %s", home_dir); asprintf(&ignore_file_path, "%s/%s", home_dir, ignore_pattern_files[0]); load_ignore_patterns(root_ignores, ignore_file_path); free(ignore_file_path); } if (opts.context > 0) { opts.before = opts.context; opts.after = opts.context; } if (opts.ackmate) { opts.color = 0; opts.print_break = 1; group = 1; opts.search_stream = 0; } if (opts.parallel) { opts.search_stream = 0; } if (opts.print_heading == 0 || opts.print_break == 0) { goto skip_group; } if (group) { opts.print_heading = 1; opts.print_break = 1; } else { opts.print_heading = 0; opts.print_break = 0; } skip_group:; if (opts.search_stream) { opts.print_break = 0; opts.print_heading = 0; opts.print_line_numbers = 0; } if (needs_query) { opts.query = strdup(argv[0]); argc--; argv++; } else { opts.query = strdup("."); } opts.query_len = strlen(opts.query); log_debug("Query is %s", opts.query); if (opts.query_len == 0) { log_err("Error: No query. What do you want to search for?"); exit(1); } if (!is_regex(opts.query)) { opts.literal = 1; } char *path = NULL; opts.paths_len = argc; if (argc > 0) { *paths = calloc(sizeof(char*), argc + 1); for (i = 0; i < argc; i++) { path = strdup(argv[i]); path_len = strlen(path); /* kill trailing slash */ if (path_len > 1 && path[path_len - 1] == '/') { path[path_len - 1] = '\0'; } (*paths)[i] = path; } (*paths)[i] = NULL; /* Make sure we search these paths instead of stdin. */ opts.search_stream = 0; } else { path = strdup("."); *paths = malloc(sizeof(char*) * 2); (*paths)[0] = path; (*paths)[1] = NULL; } }