Example #1
0
/* TODO: Append matches to some data structure instead of just printing them out.
 * Then ag can have sweet summaries of matches/files scanned/time/etc.
 */
void search_dir(ignores *ig, const char *base_path, const char *path, const int depth) {
    struct dirent **dir_list = NULL;
    struct dirent *dir = NULL;
    scandir_baton_t scandir_baton;
    int results = 0;

    char *dir_full_path = NULL;
    const char *ignore_file = NULL;
    int i;

    /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */
    for (i = 0; opts.skip_vcs_ignores ? (i == 0) : (ignore_pattern_files[i] != NULL); i++) {
        ignore_file = ignore_pattern_files[i];
        ag_asprintf(&dir_full_path, "%s/%s", path, ignore_file);
        if (strcmp(SVN_DIR, ignore_file) == 0) {
            load_svn_ignore_patterns(ig, dir_full_path);
        } else {
            load_ignore_patterns(ig, dir_full_path);
        }
        free(dir_full_path);
        dir_full_path = NULL;
    }

    if (opts.path_to_agignore) {
        load_ignore_patterns(ig, opts.path_to_agignore);
    }

    scandir_baton.ig = ig;
    scandir_baton.base_path = base_path;
    results = ag_scandir(path, &dir_list, &filename_filter, &scandir_baton);
    if (results == 0) {
        log_debug("No results found in directory %s", path);
        goto search_dir_cleanup;
    } else if (results == -1) {
        if (errno == ENOTDIR) {
            /* Not a directory. Probably a file. */
            /* If we're only searching one file, don't print the filename header at the top. */
            if (depth == 0 && opts.paths_len == 1) {
                opts.print_heading = -1;
            }
            search_file(path);
        } else {
            log_err("Error opening directory %s: %s", path, strerror(errno));
        }
        goto search_dir_cleanup;
    }

    int offset_vector[3];
    int rc = 0;
    work_queue_t *queue_item;

    for (i = 0; i < results; i++) {
        queue_item = NULL;
        dir = dir_list[i];
        ag_asprintf(&dir_full_path, "%s/%s", path, dir->d_name);

        /* If a link points to a directory then we need to treat it as a directory. */
        if (!opts.follow_symlinks && is_symlink(path, dir)) {
            log_debug("File %s ignored becaused it's a symlink", dir->d_name);
            goto cleanup;
        }

        if (!is_directory(path, dir)) {
            if (opts.file_search_regex) {
                rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
                               0, 0, offset_vector, 3);
                if (rc < 0) { /* no match */
                    log_debug("Skipping %s due to file_search_regex.", dir_full_path);
                    goto cleanup;
                } else if (opts.match_files) {
                    log_debug("match_files: file_search_regex matched for %s.", dir_full_path);
                    pthread_mutex_lock(&print_mtx);
                    print_path(dir_full_path, '\n');
                    pthread_mutex_unlock(&print_mtx);
                    goto cleanup;
                }
            }

            queue_item = ag_malloc(sizeof(work_queue_t));
            queue_item->path = dir_full_path;
            queue_item->next = NULL;
            pthread_mutex_lock(&work_queue_mtx);
            if (work_queue_tail == NULL) {
                work_queue = queue_item;
            } else {
                work_queue_tail->next = queue_item;
            }
            work_queue_tail = queue_item;
            pthread_mutex_unlock(&work_queue_mtx);
            pthread_cond_signal(&files_ready);
            log_debug("%s added to work queue", dir_full_path);
        } else if (opts.recurse_dirs) {
            if (depth < opts.max_search_depth) {
                log_debug("Searching dir %s", dir_full_path);
                ignores *child_ig = init_ignore(ig);
                search_dir(child_ig, base_path, dir_full_path, depth + 1);
                cleanup_ignore(child_ig);
            } else {
                log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path);
            }
        }

cleanup:
        ;
        free(dir);
        dir = NULL;
        if (queue_item == NULL) {
            free(dir_full_path);
            dir_full_path = NULL;
        }
    }

search_dir_cleanup:
    ;
    free(dir_list);
    dir_list = NULL;
}
Example #2
0
void parse_options(int argc, char **argv, char **paths[]) {
    int ch;
    int i;
    const char *pcre_err = NULL;
    int pcre_err_offset = 0;
    int path_len = 0;
    int useless = 0;
    int group = 1;
    int help = 0;
    int version = 0;
    int opt_index = 0;
    const char *home_dir = getenv("HOME");
    char *ignore_file_path = NULL;

    init_options();

    struct option longopts[] = {
        { "ackmate", no_argument, &(opts.ackmate), 1 },
        { "ackmate-dir-filter", required_argument, NULL, 0 },
        { "after", required_argument, NULL, 'A' },
        { "all-types", no_argument, NULL, 'a' },
        { "before", required_argument, NULL, 'B' },
        { "break", no_argument, &(opts.print_break), 1 },
        { "nobreak", no_argument, &(opts.print_break), 0 },
        { "color", no_argument, &(opts.color), 1 },
        { "nocolor", no_argument, &(opts.color), 0 },
        { "column", no_argument, &(opts.column), 1 },
        { "context", optional_argument, &(opts.context), 2 },
        { "debug", no_argument, NULL, 'D' },
        { "depth", required_argument, NULL, 0 },
        { "follow", no_argument, &(opts.follow_symlinks), 1 },
        { "file-search-regex", required_argument, NULL, 'G' },
        { "group", no_argument, &(group), 1 },
        { "nogroup", no_argument, &(group), 0 },
        { "hidden", no_argument, &(opts.search_hidden_files), 1 },
        { "invert-match", no_argument, &(opts.invert_match), 1 },
        { "nofollow", no_argument, &(opts.follow_symlinks), 0 },
        { "heading", no_argument, &(opts.print_heading), 1 },
        { "noheading", no_argument, &(opts.print_heading), 0 },
        { "no-recurse", no_argument, NULL, 'n' },
        { "help", no_argument, NULL, 'h' },
        { "ignore-case", no_argument, NULL, 'i' },
        { "files-with-matches", no_argument, NULL, 'l' },
        { "literal", no_argument, &(opts.literal), 1 },
        { "match", no_argument, &useless, 0 },
        { "max-count", required_argument, NULL, 'm' },
        { "print-long-lines", no_argument, &(opts.print_long_lines), 1 },
        { "search-binary", no_argument, &(opts.search_binary_files), 1 },
        { "search-files", no_argument, &(opts.search_stream), 0 },
        { "smart-case", no_argument, &useless, 0 },
        { "nosmart-case", no_argument, &useless, 0 },
        { "stats", no_argument, &(opts.stats), 1 },
        { "unrestricted", no_argument, NULL, 'u' },
        { "version", no_argument, &version, 1 },
        { NULL, 0, NULL, 0 }
    };

    if (argc < 2) {
        usage();
        exit(1);
    }

    /* stdin isn't a tty. something's probably being piped to ag */
    if (!isatty(fileno(stdin))) {
        opts.search_stream = 1;
    }

    /* If we're not outputting to a terminal. change output to:
        * turn off colors
        * print filenames on every line
     */
    if (!isatty(fileno(stdout))) {
        opts.color = 0;
        group = 0;
    }

    while ((ch = getopt_long(argc, argv, "A:aB:C:DG:fhilm:nvVu", longopts, &opt_index)) != -1) {
        switch (ch) {
            case 'A':
                opts.after = atoi(optarg);
                break;
            case 'a':
                opts.search_all_files = 1;
                opts.search_binary_files = 1;
                break;
            case 'B':
                opts.before = atoi(optarg);
                break;
            case 'C':
                opts.context = atoi(optarg);
                break;
            case 'D':
                set_log_level(LOG_LEVEL_DEBUG);
                break;
            case 'f':
                opts.follow_symlinks = 1;
                break;
            case 'G':
                opts.file_search_regex = pcre_compile(optarg, 0, &pcre_err, &pcre_err_offset, NULL);
                if (opts.file_search_regex == NULL) {
                  log_err("pcre_compile of file-search-regex failed at position %i. Error: %s", pcre_err_offset, pcre_err);
                  exit(1);
                }

                opts.file_search_regex_extra = pcre_study(opts.file_search_regex, 0, &pcre_err);
                if (opts.file_search_regex_extra == NULL) {
                  log_err("pcre_study of file-search-regex failed. Error: %s", pcre_err);
                  exit(1);
                }
                break;
            case 'h':
                help = 1;
                break;
            case 'i':
                opts.casing = CASE_INSENSITIVE;
                break;
            case 'l':
                opts.print_filename_only = 1;
                break;
            case 'm':
                opts.max_matches_per_file = atoi(optarg);
                break;
            case 'n':
                opts.recurse_dirs = 0;
                break;
            case 'u':
                opts.search_binary_files = 1;
                opts.search_unrestricted = 1;
                opts.search_hidden_files = 1;
                break;
            case 'v':
                opts.invert_match = 1;
                break;
            case 'V':
                version = 1;
                break;
            case 0: /* Long option */
                if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) {
                    opts.ackmate_dir_filter = pcre_compile(optarg, 0, &pcre_err, &pcre_err_offset, NULL);
                    if (opts.ackmate_dir_filter == NULL) {
                        log_err("pcre_compile of ackmate-dir-filter failed at position %i. Error: %s", pcre_err_offset, pcre_err);
                        exit(1);
                    }
                    opts.ackmate_dir_filter_extra = pcre_study(opts.ackmate_dir_filter, 0, &pcre_err);
                    if (opts.ackmate_dir_filter_extra == NULL) {
                      log_err("pcre_study of ackmate-dir-filter failed. Error: %s", pcre_err);
                      exit(1);
                    }
                    break;
                }
                else if (strcmp(longopts[opt_index].name, "depth") == 0) {
                    opts.max_search_depth = atoi(optarg);
                    break;
                }
                /* Continue to usage if we don't recognize the option */
                if (longopts[opt_index].flag != 0) {
                    break;
                }
                log_err("option %s does not take a value", longopts[opt_index].name);
            default:
                usage();
                exit(1);
        }
    }

    argc -= optind;
    argv += optind;

    if (help) {
        usage();
        exit(0);
    }

    if (version) {
        print_version();
        exit(0);
    }

    if (argc == 0) {
        log_err("What do you want to search for?");
        exit(1);
    }

    if (home_dir) {
        log_debug("Found user's home dir: %s", home_dir);
        size_t path_length = (size_t)(strlen(home_dir) + strlen(ignore_pattern_files[0])+2);
        ignore_file_path = malloc(path_length);
        strlcpy(ignore_file_path, home_dir, path_length);
        strlcat(ignore_file_path, "/", path_length);
        strlcat(ignore_file_path, ignore_pattern_files[0], path_length);

        load_ignore_patterns(ignore_file_path);

        free(ignore_file_path);
    }

    if (opts.context > 0) {
        opts.before = opts.context;
        opts.after = opts.context;
    }

    if (opts.ackmate) {
        opts.color = 0;
        opts.print_break = 1;
        group = 1;
        opts.search_stream = 0;
    }

    if (opts.print_heading == 0 || opts.print_break == 0) {
        goto skip_group;
    }

    if (group) {
        opts.print_heading = 1;
        opts.print_break = 1;
    }
    else {
        opts.print_heading = 0;
        opts.print_break = 0;
    }

    skip_group:

    if (opts.search_stream) {
        opts.print_break = 0;
        opts.print_heading = 0;
        opts.print_line_numbers = 0;
    }

    opts.query = strdup(argv[0]);
    opts.query_len = strlen(opts.query);

    log_debug("Query is %s", opts.query);

    if (opts.query_len == 0) {
        log_err("Error: No query. What do you want to search for?");
        exit(1);
    }

    char *path = NULL;
    if (argc > 1) {
        *paths = malloc(sizeof(char*) * argc);
        for (i = 1; i < argc; i++) {
            path = strdup(argv[i]);
            path_len = strlen(path);
            /* kill trailing slash */
            if (path_len > 0 && path[path_len - 1] == '/') {
              path[path_len - 1] = '\0';
            }
            (*paths)[i-1] = path;
        }
        (*paths)[i-1] = NULL;
    }
    else {
        path = strdup(".");
        *paths = malloc(sizeof(char*) * 2);
        (*paths)[0] = path;
        (*paths)[1] = NULL;
    }
}
Example #3
0
/* TODO: append matches to some data structure instead of just printing them out
 * then there can be sweet summaries of matches/files scanned/time/etc
 */
int search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) {
    /* TODO: don't just die. also make max depth configurable */
    if (depth > MAX_SEARCH_DEPTH) {
        log_err("Search depth greater than %i, giving up.", depth);
        exit(1);
    }
    struct dirent **dir_list = NULL;
    struct dirent *dir = NULL;
    int results = 0;

    int fd = -1;
    off_t f_len = 0;
    char *buf = NULL;
    int rv = 0;
    char *dir_full_path = NULL;
    size_t path_length = 0;
    int i;

    results = scandir(path, &dir_list, &ignorefile_filter, &alphasort);
    if (results > 0) {
        for (i = 0; i < results; i++) {
            dir = dir_list[i];
            path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
            dir_full_path = malloc(path_length);
            dir_full_path = strncpy(dir_full_path, path, path_length);
            dir_full_path = strncat(dir_full_path, "/", path_length);
            dir_full_path = strncat(dir_full_path, dir->d_name, path_length);
            load_ignore_patterns(dir_full_path);
            free(dir);
            dir = NULL;
            free(dir_full_path);
            dir_full_path = NULL;
        }
    }
    free(dir_list);
    dir_list = NULL;

    results = scandir(path, &dir_list, &filename_filter, &alphasort);
    if (results == 0)
    {
        log_debug("No results found in directory %s", path);
        free(dir_list);
        dir_list = NULL;
        return(0);
    }
    else if (results == -1) {
        log_err("Error opening directory %s", path);
        return(0);
    }

    match matches[MAX_MATCHES_PER_FILE];
    int matches_len = 0;
    int buf_len = 0;
    int buf_offset = 0;
    int offset_vector[MAX_MATCHES_PER_FILE * 3]; /* TODO */
    int rc = 0;
    struct stat statbuf;
    int binary = 0;

    for (i=0; i<results; i++) {
        matches_len = 0;
        buf_offset = 0;
        binary = 0;
        dir = dir_list[i];
        /* TODO: this is copy-pasted from about 30 lines above */
        path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
        dir_full_path = malloc(path_length);
        dir_full_path = strncpy(dir_full_path, path, path_length);
        dir_full_path = strncat(dir_full_path, "/", path_length);
        dir_full_path = strncat(dir_full_path, dir->d_name, path_length);

        log_debug("dir %s type %i", dir_full_path, dir->d_type);
        /* TODO: scan files in current dir before going deeper */
        if (dir->d_type == DT_DIR) {
            if (opts.recurse_dirs) {
                log_debug("Searching dir %s", dir_full_path);
                rv = search_dir(re, re_extra, dir_full_path, depth + 1);
            }
            goto cleanup;
        }

        if (opts.file_search_regex) {
          rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
                         buf_offset, 0, offset_vector, 3);
          if (rc < 0) { /* no match */
            log_debug("Skipping %s due to file_search_regex.", dir_full_path);
            goto cleanup;
          }
        }

        fd = open(dir_full_path, O_RDONLY);
        if (fd < 0) {
            log_err("Error opening file %s. Skipping...", dir_full_path);
            goto cleanup;
        }

        rv = fstat(fd, &statbuf);
        if (rv != 0) {
            log_err("Error fstat()ing file %s. Skipping...", dir_full_path);
            goto cleanup;
        }

        f_len = statbuf.st_size;

        if (f_len == 0) {
            log_debug("File %s is empty, skipping.", dir_full_path);
            goto cleanup;
        }
        else if (f_len > 1024 * 1024 * 1024) { /* 1 GB */
            log_err("File %s is too big. Skipping...", dir_full_path);
            goto cleanup;
        }

        buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0);
        if (buf == MAP_FAILED) {
            log_err("File %s failed to load: %s.", dir_full_path, strerror(errno));
            goto cleanup;
        }

        buf_len = f_len;

        if (is_binary((void*)buf, buf_len)) { /* Who needs duck typing when you have void cast? :) */
            if (opts.search_binary_files) {
                binary = 1;
            }
            else {
                log_debug("File %s is binary. Skipping...", dir_full_path);
                goto cleanup;
            }
        }

        if (opts.literal) {
            char *match_ptr = buf;
            char *(*ag_strncmp_fp)(const char*, const char*, size_t) = &ag_strnstr;
            if (opts.casing == CASE_INSENSITIVE) {
                ag_strncmp_fp = &ag_strncasestr;
            }
            while (buf_offset < buf_len) {
                match_ptr = ag_strncmp_fp(match_ptr, opts.query, buf_len - buf_offset);
                if (match_ptr == NULL) {
                    break;
                }
                matches[matches_len].start = match_ptr - buf;
                matches[matches_len].end = matches[matches_len].start + opts.query_len;
                buf_offset = matches[matches_len].end;
                matches_len++;
                match_ptr++;
                /* Don't segfault. TODO: realloc this array */
                if (matches_len >= MAX_MATCHES_PER_FILE) {
                    log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                    break;
                }
            }
        }
        else {
            /* In my profiling, most of the execution time is spent in this pcre_exec */
            while (buf_offset < buf_len &&
                 (rc = pcre_exec(re, re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
                log_debug("Match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
                buf_offset = offset_vector[1];
                matches[matches_len].start = offset_vector[0];
                matches[matches_len].end = offset_vector[1];
                matches_len++;
                /* Don't segfault. TODO: realloc this array */
                if (matches_len >= MAX_MATCHES_PER_FILE) {
                    log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                    break;
                }
            }
        }


        if (opts.stats) {
            total_file_count++;
            total_byte_count += buf_len;
        }

        if (rc == -1) {
            log_debug("No match in %s", dir_full_path);
        }

        if (matches_len > 0) {
            if (opts.print_filename_only) {
                print_path(dir_full_path);
            }
            else {
                if (binary) {
                    printf("Binary file %s matches.\n", dir_full_path);
                }
                else {
                    print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
                }
            }
        }

        cleanup:
        if (fd != -1) {
            munmap(buf, f_len);
            close(fd);
            fd = -1;
        }

        free(dir);
        dir = NULL;
        free(dir_full_path);
        dir_full_path = NULL;
    }

    free(dir_list);
    dir_list = NULL;
    return(0);
}
Example #4
0
/* TODO: append matches to some data structure instead of just printing them out
 * then there can be sweet summaries of matches/files scanned/time/etc
 */
void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) {
    struct dirent **dir_list = NULL;
    struct dirent *dir = NULL;
    int results = 0;

    int fd = -1;
    off_t f_len = 0;
    char *buf = NULL;
    char *dir_full_path = NULL;
    size_t path_length = 0;
    int i;

    /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */
#ifdef AG_OS_BSD
    results = scandir(path, &dir_list, &ignorefile_filter, &alphasort);
#else
    results = scandir(path, &dir_list, (int (*)(const struct dirent *))&ignorefile_filter, &alphasort);
#endif
    if (results > 0) {
        for (i = 0; i < results; i++) {
            dir = dir_list[i];
            path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
            dir_full_path = malloc(path_length);
            strlcpy(dir_full_path, path, path_length);
            strlcat(dir_full_path, "/", path_length);
            strlcat(dir_full_path, dir->d_name, path_length);
            load_ignore_patterns(dir_full_path);
            free(dir);
            dir = NULL;
            free(dir_full_path);
            dir_full_path = NULL;
        }
    }
    free(dir_list);
    dir_list = NULL;

#ifdef AG_OS_BSD
    results = scandir(path, &dir_list, &filename_filter, &alphasort);
#else
    results = scandir(path, &dir_list, (int (*)(const struct dirent *))&filename_filter, &alphasort);
#endif
    if (results == 0)
    {
        log_debug("No results found in directory %s", path);
        free(dir_list);
        dir_list = NULL;
        return;
    }
    else if (results == -1) {
        if (errno == ENOTDIR) {
            /* Not a directory. Probably a file. */
            /* If we're only searching one file, don't print the filename header at the top. */
            opts.print_heading = depth == 0 ? -1 : opts.print_heading;
            search_file(re, re_extra, path);
            return;
        }
        else {
            log_err("Error opening directory %s: %s", path, strerror(errno));
            return;
        }
    }

    int offset_vector[3];
    int rc = 0;
    struct stat stDirInfo;

    for (i = 0; i < results; i++) {
        dir = dir_list[i];
        /* TODO: this is copy-pasted from about 30 lines above */
        path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
        dir_full_path = malloc(path_length);
        strlcpy(dir_full_path, path, path_length);
        strlcat(dir_full_path, "/", path_length);
        strlcat(dir_full_path, dir->d_name, path_length);

        /* Some filesystems, e.g. ReiserFS, always return a type DT_UNKNOWN from readdir or scandir. */
        /* Call lstat if we find DT_UNKNOWN to get the information we need. */
        if (dir->d_type == DT_UNKNOWN) {
            if (lstat(dir_full_path, &stDirInfo) != -1) {
                if (S_ISDIR(stDirInfo.st_mode)) {
                    dir->d_type = DT_DIR;
                }
                else if (S_ISLNK(stDirInfo.st_mode)) {
                    dir->d_type = DT_LNK;
                }
            }
            else {
                log_err("lstat() failed on %s", dir_full_path);
                /* If lstat fails we may as well carry on and hope for the best. */
            }

            if (!opts.follow_symlinks && dir->d_type == DT_LNK) {
                log_debug("File %s ignored becaused it's a symlink", dir->d_name);
                goto cleanup;
            }
        }

        /* If a link points to a directory then we need to treat it as a directory. */
        if (dir->d_type == DT_LNK) {
            if (stat(dir_full_path, &stDirInfo) != -1) {
                if (S_ISDIR(stDirInfo.st_mode)) {
                    dir->d_type = DT_DIR;
                }
            }
            else {
                log_err("stat() failed on %s", dir_full_path);
                /* If stat fails we may as well carry on and hope for the best. */
            }
        }

        log_debug("dir %s type %i", dir_full_path, dir->d_type);

        if (opts.file_search_regex) {
            rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
                           0, 0, offset_vector, 3);
            if (rc < 0) { /* no match */
                log_debug("Skipping %s due to file_search_regex.", dir_full_path);
                goto cleanup;
            }
        }

        /* TODO: scan files in current dir before going deeper */
        if (dir->d_type == DT_DIR) {
            if (opts.recurse_dirs) {
                if (depth < opts.max_search_depth) {
                    log_debug("Searching dir %s", dir_full_path);
                    search_dir(re, re_extra, dir_full_path, depth + 1);
                }
                else {
                    log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path);
                }
            }
            goto cleanup;
        }

        search_file(re, re_extra, dir_full_path);

        cleanup:
        if (fd != -1) {
            munmap(buf, f_len);
            close(fd);
            fd = -1;
        }

        free(dir);
        dir = NULL;
        free(dir_full_path);
        dir_full_path = NULL;
    }

    free(dir_list);
    dir_list = NULL;
    return;
}
Example #5
0
/* TODO: append matches to some data structure instead of just printing them out
 * then there can be sweet summaries of matches/files scanned/time/etc
 */
void search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) {
    struct dirent **dir_list = NULL;
    struct dirent *dir = NULL;
    int results = 0;

    int fd = -1;
    off_t f_len = 0;
    char *buf = NULL;
    char *dir_full_path = NULL;
    size_t path_length = 0;
    int i;

    /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */
#ifdef AG_OS_BSD
    results = scandir(path, &dir_list, &ignorefile_filter, &alphasort);
#else
    results = scandir(path, &dir_list, (int (*)(const struct dirent *))&ignorefile_filter, &alphasort);
#endif
    if (results > 0) {
        for (i = 0; i < results; i++) {
            dir = dir_list[i];
            path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
            dir_full_path = malloc(path_length);
            strlcpy(dir_full_path, path, path_length);
            strlcat(dir_full_path, "/", path_length);
            strlcat(dir_full_path, dir->d_name, path_length);
            load_ignore_patterns(dir_full_path);
            free(dir);
            dir = NULL;
            free(dir_full_path);
            dir_full_path = NULL;
        }
    }
    free(dir_list);
    dir_list = NULL;

#ifdef AG_OS_BSD
    results = scandir(path, &dir_list, &filename_filter, &alphasort);
#else
    results = scandir(path, &dir_list, (int (*)(const struct dirent *))&filename_filter, &alphasort);
#endif
    if (results == 0)
    {
        log_debug("No results found in directory %s", path);
        free(dir_list);
        dir_list = NULL;
        return;
    }
    else if (results == -1) {
        if (errno == ENOTDIR) {
            /* Not a directory. Probably a file. */
            /* If we're only searching one file, don't print the filename header at the top. */
            opts.print_heading = depth == 0 ? -1 : opts.print_heading;
            search_file(re, re_extra, path);
            return;
        }
        else {
            log_err("Error opening directory %s: %s", path, strerror(errno));
            return;
        }
    }

    int offset_vector[3];
    int rc = 0;

    for (i=0; i<results; i++) {
        dir = dir_list[i];
        /* TODO: this is copy-pasted from about 30 lines above */
        path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
        dir_full_path = malloc(path_length);
        strlcpy(dir_full_path, path, path_length);
        strlcat(dir_full_path, "/", path_length);
        strlcat(dir_full_path, dir->d_name, path_length);

        log_debug("dir %s type %i", dir_full_path, dir->d_type);

        if (opts.file_search_regex) {
            rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
                           0, 0, offset_vector, 3);
            if (rc < 0) { /* no match */
                log_debug("Skipping %s due to file_search_regex.", dir_full_path);
                goto cleanup;
            }
        }

        /* TODO: scan files in current dir before going deeper */
        if (dir->d_type == DT_DIR) {
            if (opts.recurse_dirs) {
                if (depth < opts.max_search_depth) {
                    log_debug("Searching dir %s", dir_full_path);
                    search_dir(re, re_extra, dir_full_path, depth + 1);
                }
                else {
                    log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path);
                }
            }
            goto cleanup;
        }

        search_file(re, re_extra, dir_full_path);

        cleanup:
        if (fd != -1) {
            munmap(buf, f_len);
            close(fd);
            fd = -1;
        }

        free(dir);
        dir = NULL;
        free(dir_full_path);
        dir_full_path = NULL;
    }

    free(dir_list);
    dir_list = NULL;
    return;
}
Example #6
0
//TODO: append matches to some data structure instead of just printing them out
// then there can be sweet summaries of matches/files scanned/time/etc
int search_dir(pcre *re, const char* path, const int depth) {
    //TODO: don't just die. also make max depth configurable
    if(depth > MAX_SEARCH_DEPTH) {
        log_err("Search depth greater than %i, giving up.", depth);
        exit(1);
    }
    struct dirent **dir_list = NULL;
    struct dirent *dir = NULL;
    int results = 0;

    FILE *fp = NULL;
    int f_len;
    size_t r_len;
    char *buf = NULL;
    int rv = 0;
    char *dir_full_path = NULL;
    size_t path_length = 0;

    results = scandir(path, &dir_list, &ignorefile_filter, &alphasort);
    if (results > 0) {
        for (int i = 0; i < results; i++) {
            dir = dir_list[i];
            path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); // 2 for slash and null char
            dir_full_path = malloc(path_length);
            dir_full_path = strncpy(dir_full_path, path, path_length);
            dir_full_path = strncat(dir_full_path, "/", path_length);
            dir_full_path = strncat(dir_full_path, dir->d_name, path_length);
            load_ignore_patterns(dir_full_path);
            free(dir);
            free(dir_full_path);
        }
    }
    free(dir_list);

    results = scandir(path, &dir_list, &filename_filter, &alphasort);
    if (results == 0)
    {
        log_debug("No results found");
        free(dir_list);
        return(0);
    }
    else if (results == -1) {
        log_err("Error opening directory %s", path);
        return(0);
    }

    for (int i=0; i<results; i++) {
        dir = dir_list[i];
        // XXX: this is copy-pasted from about 30 lines above
        path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); // 2 for slash and null char
        dir_full_path = malloc(path_length);
        dir_full_path = strncpy(dir_full_path, path, path_length);
        dir_full_path = strncat(dir_full_path, "/", path_length);
        dir_full_path = strncat(dir_full_path, dir->d_name, path_length);

        log_debug("dir %s type %i", dir_full_path, dir->d_type);
        //TODO: scan files in current dir before going deeper
        if (dir->d_type == DT_DIR && opts.recurse_dirs) {
            log_debug("searching dir %s", dir_full_path);
            rv = search_dir(re, dir_full_path, depth + 1);
            goto cleanup;
            continue;
        }
        fp = fopen(dir_full_path, "r");
        if (fp == NULL) {
            log_err("Error opening file %s. Skipping...", dir_full_path);
            goto cleanup;
            continue;
        }

        rv = fseek(fp, 0, SEEK_END);
        if (rv != 0) {
            log_err("Error fseek()ing file %s. Skipping...", dir_full_path);
            goto cleanup;
        }

        f_len = ftell(fp); //TODO: behave differently if file is HUGE. on 32 bit, anything > 2GB will screw up this program
        if (f_len == 0) {
            log_debug("file is empty. skipping");
            goto cleanup;
        }

        rewind(fp);
        buf = (char*) malloc(sizeof(char) * f_len + 1);
        r_len = fread(buf, 1, f_len, fp);
        buf[r_len] = '\0';
        int buf_len = (int)r_len;

        int buf_offset = 0;
        int offset_vector[MAX_MATCHES_PER_FILE * 2]; //XXXX
        int rc = 0;
        char *match_start = NULL;
        char *match_end = NULL;
        int first_match = 1;
        // In my profiling, most of the execution time is spent in this pcre_exec
        while(buf_offset < buf_len && (rc = pcre_exec(re, NULL, buf, r_len, buf_offset, 0, offset_vector, sizeof(offset_vector))) >= 0 ) {
            log_debug("Match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
            match_start = buf + offset_vector[0];
            match_end = buf + offset_vector[1];
            buf_offset = offset_vector[1];
            print_match(dir_full_path, buf, match_start, match_end, first_match);
            first_match = 0;
        }

        free(buf);

        cleanup:
        if (fp != NULL) {
            fclose(fp);
        }
        free(dir);
        free(dir_full_path);
    }

    free(dir_list);
    return(0);
}
Example #7
0
void parse_options(int argc, char **argv, char **paths[]) {
    int ch;
    int i;
    int path_len = 0;
    int useless = 0;
    int group = 1;
    int help = 0;
    int version = 0;
    int opt_index = 0;
    const char *home_dir = getenv("HOME");
    char *ignore_file_path = NULL;
    int needs_query = 1;

    init_options();

    struct option longopts[] = {
        { "ackmate", no_argument, &(opts.ackmate), 1 },
        { "ackmate-dir-filter", required_argument, NULL, 0 },
        { "after", required_argument, NULL, 'A' },
        { "all-text", no_argument, NULL, 't' },
        { "all-types", no_argument, NULL, 'a' },
        { "before", required_argument, NULL, 'B' },
        { "break", no_argument, &(opts.print_break), 1 },
        { "case-sensitive", no_argument, NULL, 's' },
        { "color", no_argument, &(opts.color), 1 },
        { "column", no_argument, &(opts.column), 1 },
        { "context", optional_argument, NULL, 'C' },
        { "debug", no_argument, NULL, 'D' },
        { "depth", required_argument, NULL, 0 },
        { "file-search-regex", required_argument, NULL, 'G' },
        { "files-with-matches", no_argument, NULL, 'l' },
        { "files-without-matches", no_argument, NULL, 'L' },
        { "follow", no_argument, &(opts.follow_symlinks), 1 },
        { "group", no_argument, &(group), 1 },
        { "heading", no_argument, &(opts.print_heading), 1 },
        { "help", no_argument, NULL, 'h' },
        { "hidden", no_argument, &(opts.search_hidden_files), 1 },
        { "ignore", required_argument, NULL, 0 },
        { "ignore-case", no_argument, NULL, 'i' },
        { "invert-match", no_argument, &(opts.invert_match), 1 },
        { "literal", no_argument, NULL, 'Q' },
        { "match", no_argument, &useless, 0 },
        { "max-count", required_argument, NULL, 'm' },
        { "no-recurse", no_argument, NULL, 'n' },
        { "nobreak", no_argument, &(opts.print_break), 0 },
        { "nocolor", no_argument, &(opts.color), 0 },
        { "nofollow", no_argument, &(opts.follow_symlinks), 0 },
        { "nogroup", no_argument, &(group), 0 },
        { "noheading", no_argument, &(opts.print_heading), 0 },
        { "parallel", no_argument, &(opts.parallel), 1},
        { "path-to-agignore", required_argument, NULL, 'p'},
        { "print-long-lines", no_argument, &(opts.print_long_lines), 1 },
        { "recurse", no_argument, NULL, 'r' },
        { "search-binary", no_argument, &(opts.search_binary_files), 1 },
        { "search-files", no_argument, &(opts.search_stream), 0 },
        { "skip-vcs-ignores", no_argument, NULL, 'U' },
        { "smart-case", no_argument, NULL, 'S' },
        { "stats", no_argument, &(opts.stats), 1 },
        { "unrestricted", no_argument, NULL, 'u' },
        { "version", no_argument, &version, 1 },
        { "word-regexp", no_argument, NULL, 'w' },
        { "workers", required_argument, NULL, 0 },
        { NULL, 0, NULL, 0 }
    };

    if (argc < 2) {
        usage();
        exit(1);
    }

    /* stdin isn't a tty. something's probably being piped to ag */
    if (!isatty(fileno(stdin))) {
        opts.search_stream = 1;
    }

    /* If we're not outputting to a terminal. change output to:
        * turn off colors
        * print filenames on every line
     */
    if (!isatty(fileno(stdout))) {
        opts.color = 0;
        group = 0;
    }

    while ((ch = getopt_long(argc, argv, "A:aB:C:DG:g:fhiLlm:np:QRrSsvVtuUw", longopts, &opt_index)) != -1) {
        switch (ch) {
            case 'A':
                opts.after = atoi(optarg);
                break;
            case 'a':
                opts.search_all_files = 1;
                opts.search_binary_files = 1;
                break;
            case 'B':
                opts.before = atoi(optarg);
                break;
            case 'C':
                if (optarg) {
                    opts.context = atoi(optarg);
                    if (opts.context == 0 && errno == EINVAL) {
                        /* This arg must be the search string instead of the context length */
                        optind--;
                        opts.context = DEFAULT_CONTEXT_LEN;
                    }
                }
                else {
                    opts.context = DEFAULT_CONTEXT_LEN;
                }
                break;
            case 'D':
                set_log_level(LOG_LEVEL_DEBUG);
                break;
            case 'f':
                opts.follow_symlinks = 1;
                break;
            case 'g':
                needs_query = 0;
                opts.match_files = 1;
                /* Fall through and build regex */
            case 'G':
                compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, optarg, 0, 0);
                break;
            case 'h':
                help = 1;
                break;
            case 'i':
                opts.casing = CASE_INSENSITIVE;
                break;
            case 'L':
                opts.invert_match = 1;
                /* fall through */
            case 'l':
                opts.print_filename_only = 1;
                break;
            case 'm':
                opts.max_matches_per_file = atoi(optarg);
                break;
            case 'n':
                opts.recurse_dirs = 0;
                break;
            case 'p':
                opts.path_to_agignore = optarg;
                break;
            case 'Q':
                opts.literal = 1;
                break;
            case 'R':
            case 'r':
                opts.recurse_dirs = 1;
                break;
            case 'S':
                opts.casing = CASE_SMART;
                break;
            case 's':
                opts.casing = CASE_SENSITIVE;
                break;
            case 't':
                opts.search_all_files = 1;
                break;
            case 'u':
                opts.search_binary_files = 1;
                opts.search_all_files = 1;
                opts.search_hidden_files = 1;
                break;
            case 'U':
                opts.skip_vcs_ignores = 1;
                break;
            case 'v':
                opts.invert_match = 1;
                break;
            case 'V':
                version = 1;
                break;
            case 'w':
                opts.word_regexp = 1;
                break;
            case 0: /* Long option */
                if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) {
                    compile_study(&opts.ackmate_dir_filter, &opts.ackmate_dir_filter_extra, optarg, 0, 0);
                    break;
                }
                else if (strcmp(longopts[opt_index].name, "depth") == 0) {
                    opts.max_search_depth = atoi(optarg);
                    break;
                }
                else if (strcmp(longopts[opt_index].name, "ignore") == 0) {
                    add_ignore_pattern(root_ignores, optarg);
                    break;
                }
                else if (strcmp(longopts[opt_index].name, "workers") == 0) {
                    opts.workers = atoi(optarg);
                    break;
                }
                /* Continue to usage if we don't recognize the option */
                if (longopts[opt_index].flag != 0) {
                    break;
                }
                log_err("option %s does not take a value", longopts[opt_index].name);
            default:
                usage();
                exit(1);
        }
    }

    argc -= optind;
    argv += optind;

    if (help) {
        usage();
        exit(0);
    }

    if (version) {
        print_version();
        exit(0);
    }

    if (needs_query && argc == 0) {
        log_err("What do you want to search for?");
        exit(1);
    }

    if (home_dir && !opts.search_all_files) {
        log_debug("Found user's home dir: %s", home_dir);
        asprintf(&ignore_file_path, "%s/%s", home_dir, ignore_pattern_files[0]);
        load_ignore_patterns(root_ignores, ignore_file_path);
        free(ignore_file_path);
    }

    if (opts.context > 0) {
        opts.before = opts.context;
        opts.after = opts.context;
    }

    if (opts.ackmate) {
        opts.color = 0;
        opts.print_break = 1;
        group = 1;
        opts.search_stream = 0;
    }

    if (opts.parallel) {
        opts.search_stream = 0;
    }

    if (opts.print_heading == 0 || opts.print_break == 0) {
        goto skip_group;
    }

    if (group) {
        opts.print_heading = 1;
        opts.print_break = 1;
    }
    else {
        opts.print_heading = 0;
        opts.print_break = 0;
    }

    skip_group:;

    if (opts.search_stream) {
        opts.print_break = 0;
        opts.print_heading = 0;
        opts.print_line_numbers = 0;
    }

    if (needs_query) {
        opts.query = strdup(argv[0]);
        argc--;
        argv++;
    }
    else {
        opts.query = strdup(".");
    }
    opts.query_len = strlen(opts.query);

    log_debug("Query is %s", opts.query);

    if (opts.query_len == 0) {
        log_err("Error: No query. What do you want to search for?");
        exit(1);
    }

    if (!is_regex(opts.query)) {
        opts.literal = 1;
    }

    char *path = NULL;
    opts.paths_len = argc;
    if (argc > 0) {
        *paths = calloc(sizeof(char*), argc + 1);
        for (i = 0; i < argc; i++) {
            path = strdup(argv[i]);
            path_len = strlen(path);
            /* kill trailing slash */
            if (path_len > 1 && path[path_len - 1] == '/') {
              path[path_len - 1] = '\0';
            }
            (*paths)[i] = path;
        }
        (*paths)[i] = NULL;
        /* Make sure we search these paths instead of stdin. */
        opts.search_stream = 0;
    }
    else {
        path = strdup(".");
        *paths = malloc(sizeof(char*) * 2);
        (*paths)[0] = path;
        (*paths)[1] = NULL;
    }
}