Example #1
0
void search_buf(const char *buf, const int buf_len,
                const char *dir_full_path) {
    int binary = -1;  /* 1 = yes, 0 = no, -1 = don't know */
    int buf_offset = 0;

    if (opts.search_stream) {
        binary = 0;
    } else if (!opts.search_binary_files) {
        binary = is_binary((void*) buf, buf_len);
        if (binary) {
            log_debug("File %s is binary. Skipping...", dir_full_path);
            return;
        }
    }

    int matches_len = 0;
    match *matches;
    size_t matches_size;
    size_t matches_spare;

    if (opts.invert_match) {
        /* If we are going to invert the set of matches at the end, we will need
         * one extra match struct, even if there are no matches at all. So make
         * sure we have a nonempty array; and make sure we always have spare
         * capacity for one extra.
         */
        matches_size = 100;
        matches = ag_malloc(matches_size * sizeof(match));
        matches_spare = 1;
    } else {
        matches_size = 0;
        matches = NULL;
        matches_spare = 0;
    }

    if (opts.literal) {
        const char *match_ptr = buf;
        strncmp_fp ag_strnstr_fp = get_strstr(opts);

        while (buf_offset < buf_len) {
            match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup);
            if (match_ptr == NULL) {
                break;
            }

            if (opts.word_regexp) {
                const char *start = match_ptr;
                const char *end = match_ptr + opts.query_len;

                /* Check whether both start and end of the match lie on a word
                 * boundary
                 */
                if ((start == buf ||
                        is_wordchar(*(start - 1)) != opts.literal_starts_wordchar)
                        &&
                        (end == buf + buf_len ||
                         is_wordchar(*end) != opts.literal_ends_wordchar))
                {
                    /* It's a match */
                } else {
                    /* It's not a match */
                    match_ptr += opts.query_len;
                    buf_offset = end - buf;
                    continue;
                }
            }

            if ((size_t)matches_len + matches_spare >= matches_size) {
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = match_ptr - buf;
            matches[matches_len].end = matches[matches_len].start + opts.query_len;
            buf_offset = matches[matches_len].end;
            log_debug("Match found. File %s, offset %i bytes.", dir_full_path, matches[matches_len].start);
            matches_len++;
            match_ptr += opts.query_len;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    } else {
        int rc;
        int offset_vector[3];
        while (buf_offset < buf_len &&
                (rc = pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
            log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
            buf_offset = offset_vector[1];

            /* TODO: copy-pasted from above. FIXME */
            if ((size_t)matches_len + matches_spare >= matches_size) {
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = offset_vector[0];
            matches[matches_len].end = offset_vector[1];
            matches_len++;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    }

    if (opts.invert_match) {
        matches_len = invert_matches(matches, matches_len, buf_len);
    }

    if (opts.stats) {
        pthread_mutex_lock(&stats_mtx);
        stats.total_bytes += buf_len;
        stats.total_files++;
        stats.total_matches += matches_len;
        pthread_mutex_unlock(&stats_mtx);
    }

    if (matches_len > 0) {
        if (binary == -1 && !opts.print_filename_only) {
            binary = is_binary((void*) buf, buf_len);
        }
        pthread_mutex_lock(&print_mtx);
        if (opts.print_filename_only) {
            print_path(dir_full_path, '\n');
        } else if (binary) {
            print_binary_file_matches(dir_full_path);
        } else {
            print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
        }
        pthread_mutex_unlock(&print_mtx);
    } else {
        log_debug("No match in %s", dir_full_path);
    }

    if (matches_size > 0) {
        free(matches);
    }
}
Example #2
0
void search_buf(const pcre *re, const pcre_extra *re_extra,
                const char *buf, const int buf_len,
                const char *dir_full_path) {
    int binary = 0;
    int buf_offset = 0;
    match matches[opts.max_matches_per_file];
    int matches_len = 0;
    int offset_vector[opts.max_matches_per_file * 3]; /* TODO */
    int rc = 0;

    if (is_binary((void*)buf, buf_len)) { /* Who needs duck typing when you have void cast? :) */
        if (opts.search_binary_files) {
            binary = 1;
        }
        else {
            log_debug("File %s is binary. Skipping...", dir_full_path);
            return;
        }
    }

    if (opts.literal) {
        const char *match_ptr = buf;
        char *(*ag_strncmp_fp)(const char*, const char*, const size_t, const size_t, const size_t[]) = &boyer_moore_strnstr;

        if (opts.casing == CASE_INSENSITIVE) {
            ag_strncmp_fp = &boyer_moore_strncasestr;
        }
        while (buf_offset < buf_len) {
            match_ptr = ag_strncmp_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup);
            if (match_ptr == NULL) {
                break;
            }
            matches[matches_len].start = match_ptr - buf;
            matches[matches_len].end = matches[matches_len].start + opts.query_len;
            buf_offset = matches[matches_len].end;
            log_debug("Match found. File %s, offset %i bytes.", dir_full_path, matches[matches_len].start);
            matches_len++;
            match_ptr += opts.query_len;
            /* Don't segfault. TODO: realloc this array */
            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    }
    else {
        /* In my profiling, most of the execution time is spent in this pcre_exec */
        while (buf_offset < buf_len &&
              (rc = pcre_exec(re, re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
            log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
            buf_offset = offset_vector[1];
            matches[matches_len].start = offset_vector[0];
            matches[matches_len].end = offset_vector[1];
            matches_len++;
            /* Don't segfault. TODO: realloc this array */
            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    }

    if (opts.invert_match) {
        matches_len = invert_matches(matches, matches_len, buf_len);
    }

    if (opts.stats) {
        stats.total_bytes += buf_len;
        stats.total_files++;
        stats.total_matches += matches_len;
    }

    if (matches_len > 0) {
        if (opts.print_filename_only) {
            print_path(dir_full_path, '\n');
        }
        else {
            if (binary) {
                print_binary_file_matches(dir_full_path);
            }
            else {
                print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
            }
        }
    }
    else {
        log_debug("No match in %s", dir_full_path);
    }
}
Example #3
0
/* TODO: append matches to some data structure instead of just printing them out
 * then there can be sweet summaries of matches/files scanned/time/etc
 */
int search_dir(const pcre *re, const pcre_extra *re_extra, const char* path, const int depth) {
    /* TODO: don't just die. also make max depth configurable */
    if (depth > MAX_SEARCH_DEPTH) {
        log_err("Search depth greater than %i, giving up.", depth);
        exit(1);
    }
    struct dirent **dir_list = NULL;
    struct dirent *dir = NULL;
    int results = 0;

    int fd = -1;
    off_t f_len = 0;
    char *buf = NULL;
    int rv = 0;
    char *dir_full_path = NULL;
    size_t path_length = 0;
    int i;

    results = scandir(path, &dir_list, &ignorefile_filter, &alphasort);
    if (results > 0) {
        for (i = 0; i < results; i++) {
            dir = dir_list[i];
            path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
            dir_full_path = malloc(path_length);
            dir_full_path = strncpy(dir_full_path, path, path_length);
            dir_full_path = strncat(dir_full_path, "/", path_length);
            dir_full_path = strncat(dir_full_path, dir->d_name, path_length);
            load_ignore_patterns(dir_full_path);
            free(dir);
            dir = NULL;
            free(dir_full_path);
            dir_full_path = NULL;
        }
    }
    free(dir_list);
    dir_list = NULL;

    results = scandir(path, &dir_list, &filename_filter, &alphasort);
    if (results == 0)
    {
        log_debug("No results found in directory %s", path);
        free(dir_list);
        dir_list = NULL;
        return(0);
    }
    else if (results == -1) {
        log_err("Error opening directory %s", path);
        return(0);
    }

    match matches[MAX_MATCHES_PER_FILE];
    int matches_len = 0;
    int buf_len = 0;
    int buf_offset = 0;
    int offset_vector[MAX_MATCHES_PER_FILE * 3]; /* TODO */
    int rc = 0;
    struct stat statbuf;
    int binary = 0;

    for (i=0; i<results; i++) {
        matches_len = 0;
        buf_offset = 0;
        binary = 0;
        dir = dir_list[i];
        /* TODO: this is copy-pasted from about 30 lines above */
        path_length = (size_t)(strlen(path) + strlen(dir->d_name) + 2); /* 2 for slash and null char */
        dir_full_path = malloc(path_length);
        dir_full_path = strncpy(dir_full_path, path, path_length);
        dir_full_path = strncat(dir_full_path, "/", path_length);
        dir_full_path = strncat(dir_full_path, dir->d_name, path_length);

        log_debug("dir %s type %i", dir_full_path, dir->d_type);
        /* TODO: scan files in current dir before going deeper */
        if (dir->d_type == DT_DIR) {
            if (opts.recurse_dirs) {
                log_debug("Searching dir %s", dir_full_path);
                rv = search_dir(re, re_extra, dir_full_path, depth + 1);
            }
            goto cleanup;
        }

        if (opts.file_search_regex) {
          rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
                         buf_offset, 0, offset_vector, 3);
          if (rc < 0) { /* no match */
            log_debug("Skipping %s due to file_search_regex.", dir_full_path);
            goto cleanup;
          }
        }

        fd = open(dir_full_path, O_RDONLY);
        if (fd < 0) {
            log_err("Error opening file %s. Skipping...", dir_full_path);
            goto cleanup;
        }

        rv = fstat(fd, &statbuf);
        if (rv != 0) {
            log_err("Error fstat()ing file %s. Skipping...", dir_full_path);
            goto cleanup;
        }

        f_len = statbuf.st_size;

        if (f_len == 0) {
            log_debug("File %s is empty, skipping.", dir_full_path);
            goto cleanup;
        }
        else if (f_len > 1024 * 1024 * 1024) { /* 1 GB */
            log_err("File %s is too big. Skipping...", dir_full_path);
            goto cleanup;
        }

        buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0);
        if (buf == MAP_FAILED) {
            log_err("File %s failed to load: %s.", dir_full_path, strerror(errno));
            goto cleanup;
        }

        buf_len = f_len;

        if (is_binary((void*)buf, buf_len)) { /* Who needs duck typing when you have void cast? :) */
            if (opts.search_binary_files) {
                binary = 1;
            }
            else {
                log_debug("File %s is binary. Skipping...", dir_full_path);
                goto cleanup;
            }
        }

        if (opts.literal) {
            char *match_ptr = buf;
            char *(*ag_strncmp_fp)(const char*, const char*, size_t) = &ag_strnstr;
            if (opts.casing == CASE_INSENSITIVE) {
                ag_strncmp_fp = &ag_strncasestr;
            }
            while (buf_offset < buf_len) {
                match_ptr = ag_strncmp_fp(match_ptr, opts.query, buf_len - buf_offset);
                if (match_ptr == NULL) {
                    break;
                }
                matches[matches_len].start = match_ptr - buf;
                matches[matches_len].end = matches[matches_len].start + opts.query_len;
                buf_offset = matches[matches_len].end;
                matches_len++;
                match_ptr++;
                /* Don't segfault. TODO: realloc this array */
                if (matches_len >= MAX_MATCHES_PER_FILE) {
                    log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                    break;
                }
            }
        }
        else {
            /* In my profiling, most of the execution time is spent in this pcre_exec */
            while (buf_offset < buf_len &&
                 (rc = pcre_exec(re, re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
                log_debug("Match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
                buf_offset = offset_vector[1];
                matches[matches_len].start = offset_vector[0];
                matches[matches_len].end = offset_vector[1];
                matches_len++;
                /* Don't segfault. TODO: realloc this array */
                if (matches_len >= MAX_MATCHES_PER_FILE) {
                    log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                    break;
                }
            }
        }


        if (opts.stats) {
            total_file_count++;
            total_byte_count += buf_len;
        }

        if (rc == -1) {
            log_debug("No match in %s", dir_full_path);
        }

        if (matches_len > 0) {
            if (opts.print_filename_only) {
                print_path(dir_full_path);
            }
            else {
                if (binary) {
                    printf("Binary file %s matches.\n", dir_full_path);
                }
                else {
                    print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
                }
            }
        }

        cleanup:
        if (fd != -1) {
            munmap(buf, f_len);
            close(fd);
            fd = -1;
        }

        free(dir);
        dir = NULL;
        free(dir_full_path);
        dir_full_path = NULL;
    }

    free(dir_list);
    dir_list = NULL;
    return(0);
}
Example #4
0
void search_buf(const char *buf, const size_t buf_len,
                const char *dir_full_path) {
    int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */
    size_t buf_offset = 0;

    if (opts.search_stream) {
        binary = 0;
    } else if (!opts.search_binary_files) {
        binary = is_binary((const void *)buf, buf_len);
        if (binary) {
            log_debug("File %s is binary. Skipping...", dir_full_path);
            return;
        }
    }

    int matches_len = 0;
    match *matches;
    size_t matches_size;
    size_t matches_spare;

    if (opts.invert_match) {
        /* If we are going to invert the set of matches at the end, we will need
         * one extra match struct, even if there are no matches at all. So make
         * sure we have a nonempty array; and make sure we always have spare
         * capacity for one extra.
         */
        matches_size = 100;
        matches = ag_malloc(matches_size * sizeof(match));
        matches_spare = 1;
    } else {
        matches_size = 0;
        matches = NULL;
        matches_spare = 0;
    }

    if (opts.query_len == 1 && opts.query[0] == '.') {
        matches_size = 1;
        matches = ag_malloc(matches_size * sizeof(match));
        matches[0].start = 0;
        matches[0].end = buf_len;
        matches_len = 1;
    } else if (opts.literal) {
        const char *match_ptr = buf;
        strncmp_fp ag_strnstr_fp = get_strstr(opts.casing);

        while (buf_offset < buf_len) {
            match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup);
            if (match_ptr == NULL) {
                break;
            }

            if (opts.word_regexp) {
                const char *start = match_ptr;
                const char *end = match_ptr + opts.query_len;

                /* Check whether both start and end of the match lie on a word
                 * boundary
                 */
                if ((start == buf ||
                     is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) &&
                    (end == buf + buf_len ||
                     is_wordchar(*end) != opts.literal_ends_wordchar)) {
                    /* It's a match */
                } else {
                    /* It's not a match */
                    match_ptr += opts.query_len;
                    buf_offset = end - buf;
                    continue;
                }
            }

            if ((size_t)matches_len + matches_spare >= matches_size) {
                /* TODO: benchmark initial size of matches. 100 may be too small/big */
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = match_ptr - buf;
            matches[matches_len].end = matches[matches_len].start + opts.query_len;
            buf_offset = matches[matches_len].end;
            log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start);
            matches_len++;
            match_ptr += opts.query_len;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    } else {
        int offset_vector[3];
        while (buf_offset < buf_len &&
               (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
            log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
            buf_offset = offset_vector[1];

            /* TODO: copy-pasted from above. FIXME */
            if ((size_t)matches_len + matches_spare >= matches_size) {
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = offset_vector[0];
            matches[matches_len].end = offset_vector[1];
            matches_len++;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    }

    if (opts.invert_match) {
        matches_len = invert_matches(buf, buf_len, matches, matches_len);
    }

    if (opts.stats) {
        pthread_mutex_lock(&stats_mtx);
        stats.total_bytes += buf_len;
        stats.total_files++;
        stats.total_matches += matches_len;
        pthread_mutex_unlock(&stats_mtx);
    }

    if (matches_len > 0) {
        if (binary == -1 && !opts.print_filename_only) {
            binary = is_binary((const void *)buf, buf_len);
        }
        pthread_mutex_lock(&print_mtx);
        if (opts.print_filename_only) {
            /* If the --files-without-matches or -L option in passed we should
             * not print a matching line. This option currently sets
             * opts.print_filename_only and opts.invert_match. Unfortunately
             * setting the latter has the side effect of making matches.len = 1
             * on a file-without-matches which is not desired behaviour. See
             * GitHub issue 206 for the consequences if this behaviour is not
             * checked. */
            if (!opts.invert_match || matches_len < 2) {
                print_path(dir_full_path, '\n');
            }
        } else if (binary) {
            print_binary_file_matches(dir_full_path);
        } else {
            print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
        }
        pthread_mutex_unlock(&print_mtx);
    } else {
        log_debug("No match in %s", dir_full_path);
    }

    if (matches_size > 0) {
        free(matches);
    }
}