void search_buf(const pcre *re, const pcre_extra *re_extra, const char *buf, const int buf_len, const char *dir_full_path) { int binary = 0; int buf_offset = 0; match matches[opts.max_matches_per_file]; int matches_len = 0; int offset_vector[opts.max_matches_per_file * 3]; /* TODO */ int rc = 0; if (is_binary((void*)buf, buf_len)) { /* Who needs duck typing when you have void cast? :) */ if (opts.search_binary_files) { binary = 1; } else { log_debug("File %s is binary. Skipping...", dir_full_path); return; } } if (opts.literal) { const char *match_ptr = buf; char *(*ag_strncmp_fp)(const char*, const char*, const size_t, const size_t, const size_t[]) = &boyer_moore_strnstr; if (opts.casing == CASE_INSENSITIVE) { ag_strncmp_fp = &boyer_moore_strncasestr; } while (buf_offset < buf_len) { match_ptr = ag_strncmp_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup); if (match_ptr == NULL) { break; } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %i bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; /* Don't segfault. TODO: realloc this array */ if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { /* In my profiling, most of the execution time is spent in this pcre_exec */ while (buf_offset < buf_len && (rc = pcre_exec(re, re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; /* Don't segfault. TODO: realloc this array */ if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.invert_match) { matches_len = invert_matches(matches, matches_len, buf_len); } if (opts.stats) { stats.total_bytes += buf_len; stats.total_files++; stats.total_matches += matches_len; } if (matches_len > 0) { if (opts.print_filename_only) { print_path(dir_full_path, '\n'); } else { if (binary) { print_binary_file_matches(dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } } } else { log_debug("No match in %s", dir_full_path); } }
void search_buf(const char *buf, const int buf_len, const char *dir_full_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ int buf_offset = 0; if (opts.search_stream) { binary = 0; } else if (!opts.search_binary_files) { binary = is_binary((void*) buf, buf_len); if (binary) { log_debug("File %s is binary. Skipping...", dir_full_path); return; } } int matches_len = 0; match *matches; size_t matches_size; size_t matches_spare; if (opts.invert_match) { /* If we are going to invert the set of matches at the end, we will need * one extra match struct, even if there are no matches at all. So make * sure we have a nonempty array; and make sure we always have spare * capacity for one extra. */ matches_size = 100; matches = ag_malloc(matches_size * sizeof(match)); matches_spare = 1; } else { matches_size = 0; matches = NULL; matches_spare = 0; } if (opts.literal) { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %i bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { int rc; int offset_vector[3]; while (buf_offset < buf_len && (rc = pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; /* TODO: copy-pasted from above. FIXME */ if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.invert_match) { matches_len = invert_matches(matches, matches_len, buf_len); } if (opts.stats) { pthread_mutex_lock(&stats_mtx); stats.total_bytes += buf_len; stats.total_files++; stats.total_matches += matches_len; pthread_mutex_unlock(&stats_mtx); } if (matches_len > 0) { if (binary == -1 && !opts.print_filename_only) { binary = is_binary((void*) buf, buf_len); } pthread_mutex_lock(&print_mtx); if (opts.print_filename_only) { print_path(dir_full_path, '\n'); } else if (binary) { print_binary_file_matches(dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } pthread_mutex_unlock(&print_mtx); } else { log_debug("No match in %s", dir_full_path); } if (matches_size > 0) { free(matches); } }
void search_buf(const char *buf, const size_t buf_len, const char *dir_full_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ size_t buf_offset = 0; if (opts.search_stream) { binary = 0; } else if (!opts.search_binary_files) { binary = is_binary((const void *)buf, buf_len); if (binary) { log_debug("File %s is binary. Skipping...", dir_full_path); return; } } int matches_len = 0; match *matches; size_t matches_size; size_t matches_spare; if (opts.invert_match) { /* If we are going to invert the set of matches at the end, we will need * one extra match struct, even if there are no matches at all. So make * sure we have a nonempty array; and make sure we always have spare * capacity for one extra. */ matches_size = 100; matches = ag_malloc(matches_size * sizeof(match)); matches_spare = 1; } else { matches_size = 0; matches = NULL; matches_spare = 0; } if (opts.query_len == 1 && opts.query[0] == '.') { matches_size = 1; matches = ag_malloc(matches_size * sizeof(match)); matches[0].start = 0; matches[0].end = buf_len; matches_len = 1; } else if (opts.literal) { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts.casing); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if ((size_t)matches_len + matches_spare >= matches_size) { /* TODO: benchmark initial size of matches. 100 may be too small/big */ matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { int offset_vector[3]; while (buf_offset < buf_len && (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; /* TODO: copy-pasted from above. FIXME */ if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.invert_match) { matches_len = invert_matches(buf, buf_len, matches, matches_len); } if (opts.stats) { pthread_mutex_lock(&stats_mtx); stats.total_bytes += buf_len; stats.total_files++; stats.total_matches += matches_len; pthread_mutex_unlock(&stats_mtx); } if (matches_len > 0) { if (binary == -1 && !opts.print_filename_only) { binary = is_binary((const void *)buf, buf_len); } pthread_mutex_lock(&print_mtx); if (opts.print_filename_only) { /* If the --files-without-matches or -L option in passed we should * not print a matching line. This option currently sets * opts.print_filename_only and opts.invert_match. Unfortunately * setting the latter has the side effect of making matches.len = 1 * on a file-without-matches which is not desired behaviour. See * GitHub issue 206 for the consequences if this behaviour is not * checked. */ if (!opts.invert_match || matches_len < 2) { print_path(dir_full_path, '\n'); } } else if (binary) { print_binary_file_matches(dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } pthread_mutex_unlock(&print_mtx); } else { log_debug("No match in %s", dir_full_path); } if (matches_size > 0) { free(matches); } }