void add_ignore_pattern(ignores *ig, const char* pattern) { int i; /* Strip off the leading ./ so that matches are more likely. */ if (strncmp(pattern, "./", 2) == 0) { pattern += 2; } if (is_fnmatch(pattern)) { ig->regexes_len++; ig->regexes = ag_realloc(ig->regexes, ig->regexes_len * sizeof(char*)); ig->regexes[ig->regexes_len - 1] = ag_strdup(pattern); log_debug("added regex ignore pattern %s", pattern); } else { /* a balanced binary tree is best for performance, but I'm lazy */ ig->names_len++; ig->names = ag_realloc(ig->names, ig->names_len * sizeof(char*)); for (i = ig->names_len - 1; i > 0; i--) { if (strcmp(pattern, ig->names[i-1]) > 0) { break; } ig->names[i] = ig->names[i-1]; } ig->names[i] = ag_strdup(pattern); log_debug("added literal ignore pattern %s", pattern); } }
char *make_lang_regex(char *ext_array, size_t num_exts) { int regex_capacity = 100; char *regex = ag_malloc(regex_capacity); int regex_length = 3; int subsequent = 0; char *extension; size_t i; strcpy(regex, "\\.("); for (i = 0; i < num_exts; ++i) { extension = ext_array + i * SINGLE_EXT_LEN; int extension_length = strlen(extension); while (regex_length + extension_length + 3 + subsequent > regex_capacity) { regex_capacity *= 2; regex = ag_realloc(regex, regex_capacity); } if (subsequent) { regex[regex_length++] = '|'; } else { subsequent = 1; } strcpy(regex + regex_length, extension); regex_length += extension_length; } regex[regex_length++] = ')'; regex[regex_length++] = '$'; regex[regex_length++] = 0; return regex; }
char *make_lang_regex(const char **extensions) { int regex_capacity = 100; char *regex = ag_malloc(regex_capacity); int regex_length = 3; int subsequent = 0; const char **extension; strcpy(regex, "\\.("); for (extension = extensions; *extension; ++extension) { int extension_length = strlen(*extension); while (regex_length + extension_length + 3 + subsequent > regex_capacity) { regex_capacity *= 2; regex = ag_realloc(regex, regex_capacity); } if (subsequent) { regex[regex_length++] = '|'; } else { subsequent = 1; } strcpy(regex + regex_length, *extension); regex_length += extension_length; } regex[regex_length++] = ')'; regex[regex_length++] = '$'; regex[regex_length++] = 0; return regex; }
void realloc_matches(match_t **matches, size_t *matches_size, size_t matches_len) { if (matches_len < *matches_size) { return; } /* TODO: benchmark initial size of matches. 100 may be too small/big */ *matches_size = *matches ? *matches_size * 2 : 100; *matches = (match_t*) ag_realloc(*matches, *matches_size * sizeof(match_t)); }
void add_ignore_pattern(ignores *ig, const char* pattern) { int i; int pattern_len; if ('/' == pattern[0]) { log_debug("Pattern begins with '/', skipping."); return; } /* Strip off the leading ./ so that matches are more likely. */ if (strncmp(pattern, "./", 2) == 0) { pattern += 2; } /* Kill trailing whitespace */ for (pattern_len = strlen(pattern); pattern_len > 0; pattern_len--) { if (!isspace(pattern[pattern_len-1])) { break; } } if (pattern_len == 0) { log_debug("Pattern is empty. Not adding any ignores."); return; } /* TODO: de-dupe these patterns */ if (is_fnmatch(pattern)) { ig->regexes_len++; ig->regexes = ag_realloc(ig->regexes, ig->regexes_len * sizeof(char*)); ig->regexes[ig->regexes_len - 1] = ag_strndup(pattern, pattern_len); log_debug("added regex ignore pattern %s", pattern); } else { /* a balanced binary tree is best for performance, but I'm lazy */ ig->names_len++; ig->names = ag_realloc(ig->names, ig->names_len * sizeof(char*)); for (i = ig->names_len - 1; i > 0; i--) { if (strcmp(pattern, ig->names[i-1]) > 0) { break; } ig->names[i] = ig->names[i-1]; } ig->names[i] = ag_strndup(pattern, pattern_len); log_debug("added literal ignore pattern %s", pattern); } }
void add_ignore_pattern(ignores *ig, const char *pattern) { int i; int pattern_len; /* Strip off the leading dot so that matches are more likely. */ if (strncmp(pattern, "./", 2) == 0) { pattern++; } /* Kill trailing whitespace */ for (pattern_len = strlen(pattern); pattern_len > 0; pattern_len--) { if (!isspace(pattern[pattern_len - 1])) { break; } } if (pattern_len == 0) { log_debug("Pattern is empty. Not adding any ignores."); return; } char ***patterns_p; size_t *patterns_len; if (is_fnmatch(pattern)) { if (pattern[0] == '/') { patterns_p = &(ig->slash_regexes); patterns_len = &(ig->slash_regexes_len); pattern++; pattern_len--; } else { patterns_p = &(ig->regexes); patterns_len = &(ig->regexes_len); } } else { if (pattern[0] == '/') { patterns_p = &(ig->slash_names); patterns_len = &(ig->slash_names_len); pattern++; pattern_len--; } else { patterns_p = &(ig->names); patterns_len = &(ig->names_len); } } ++*patterns_len; char **patterns; /* a balanced binary tree is best for performance, but I'm lazy */ *patterns_p = patterns = ag_realloc(*patterns_p, (*patterns_len) * sizeof(char *)); /* TODO: de-dupe these patterns */ for (i = *patterns_len - 1; i > 0; i--) { if (strcmp(pattern, patterns[i - 1]) > 0) { break; } patterns[i] = patterns[i - 1]; } patterns[i] = ag_strndup(pattern, pattern_len); log_debug("added ignore pattern %s to %s", pattern, ig == root_ignores ? "root ignores" : ig->abs_path); }
void load_svn_ignore_patterns(ignores *ig, const char *path) { FILE *fp = NULL; char *dir_prop_base; ag_asprintf(&dir_prop_base, "%s/%s", path, SVN_DIR_PROP_BASE); fp = fopen(dir_prop_base, "r"); if (fp == NULL) { log_debug("Skipping svn ignore file %s", dir_prop_base); free(dir_prop_base); return; } char *entry = NULL; size_t entry_len = 0; char *key = ag_malloc(32); /* Sane start for max key length. */ size_t key_len = 0; size_t bytes_read = 0; char *entry_line; size_t line_len; int matches; while (fscanf(fp, "K %zu\n", &key_len) == 1) { key = ag_realloc(key, key_len + 1); bytes_read = fread(key, 1, key_len, fp); key[key_len] = '\0'; matches = fscanf(fp, "\nV %zu\n", &entry_len); if (matches != 1) { log_debug("Unable to parse svnignore file %s: fscanf() got %i matches, expected 1.", dir_prop_base, matches); goto cleanup; } if (strncmp(SVN_PROP_IGNORE, key, bytes_read) != 0) { log_debug("key is %s, not %s. skipping %u bytes", key, SVN_PROP_IGNORE, entry_len); /* Not the key we care about. fseek and repeat */ fseek(fp, entry_len + 1, SEEK_CUR); /* +1 to account for newline. yes I know this is hacky */ continue; } /* Aww yeah. Time to ignore stuff */ entry = ag_malloc(entry_len + 1); bytes_read = fread(entry, 1, entry_len, fp); entry[bytes_read] = '\0'; log_debug("entry: %s", entry); break; } if (entry == NULL) { goto cleanup; } char *patterns = entry; size_t patterns_len = strlen(patterns); while (*patterns != '\0' && patterns < (entry + bytes_read)) { for (line_len = 0; line_len < patterns_len; line_len++) { if (patterns[line_len] == '\n') { break; } } if (line_len > 0) { entry_line = ag_strndup(patterns, line_len); add_ignore_pattern(ig, entry_line); free(entry_line); } patterns += line_len + 1; patterns_len -= line_len + 1; } free(entry); cleanup: free(dir_prop_base); free(key); fclose(fp); }
void search_buf(const char *buf, const int buf_len, const char *dir_full_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ int buf_offset = 0; if (opts.search_stream) { binary = 0; } else if (!opts.search_binary_files) { binary = is_binary((void*) buf, buf_len); if (binary) { log_debug("File %s is binary. Skipping...", dir_full_path); return; } } int matches_len = 0; match *matches; size_t matches_size; size_t matches_spare; if (opts.invert_match) { /* If we are going to invert the set of matches at the end, we will need * one extra match struct, even if there are no matches at all. So make * sure we have a nonempty array; and make sure we always have spare * capacity for one extra. */ matches_size = 100; matches = ag_malloc(matches_size * sizeof(match)); matches_spare = 1; } else { matches_size = 0; matches = NULL; matches_spare = 0; } if (opts.literal) { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %i bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { int rc; int offset_vector[3]; while (buf_offset < buf_len && (rc = pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; /* TODO: copy-pasted from above. FIXME */ if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.invert_match) { matches_len = invert_matches(matches, matches_len, buf_len); } if (opts.stats) { pthread_mutex_lock(&stats_mtx); stats.total_bytes += buf_len; stats.total_files++; stats.total_matches += matches_len; pthread_mutex_unlock(&stats_mtx); } if (matches_len > 0) { if (binary == -1 && !opts.print_filename_only) { binary = is_binary((void*) buf, buf_len); } pthread_mutex_lock(&print_mtx); if (opts.print_filename_only) { print_path(dir_full_path, '\n'); } else if (binary) { print_binary_file_matches(dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } pthread_mutex_unlock(&print_mtx); } else { log_debug("No match in %s", dir_full_path); } if (matches_size > 0) { free(matches); } }
void search_buf(const char *buf, const size_t buf_len, const char *dir_full_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ size_t buf_offset = 0; if (opts.search_stream) { binary = 0; } else if (!opts.search_binary_files) { binary = is_binary((const void *)buf, buf_len); if (binary) { log_debug("File %s is binary. Skipping...", dir_full_path); return; } } int matches_len = 0; match *matches; size_t matches_size; size_t matches_spare; if (opts.invert_match) { /* If we are going to invert the set of matches at the end, we will need * one extra match struct, even if there are no matches at all. So make * sure we have a nonempty array; and make sure we always have spare * capacity for one extra. */ matches_size = 100; matches = ag_malloc(matches_size * sizeof(match)); matches_spare = 1; } else { matches_size = 0; matches = NULL; matches_spare = 0; } if (opts.query_len == 1 && opts.query[0] == '.') { matches_size = 1; matches = ag_malloc(matches_size * sizeof(match)); matches[0].start = 0; matches[0].end = buf_len; matches_len = 1; } else if (opts.literal) { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts.casing); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if ((size_t)matches_len + matches_spare >= matches_size) { /* TODO: benchmark initial size of matches. 100 may be too small/big */ matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { int offset_vector[3]; while (buf_offset < buf_len && (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; /* TODO: copy-pasted from above. FIXME */ if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.invert_match) { matches_len = invert_matches(buf, buf_len, matches, matches_len); } if (opts.stats) { pthread_mutex_lock(&stats_mtx); stats.total_bytes += buf_len; stats.total_files++; stats.total_matches += matches_len; pthread_mutex_unlock(&stats_mtx); } if (matches_len > 0) { if (binary == -1 && !opts.print_filename_only) { binary = is_binary((const void *)buf, buf_len); } pthread_mutex_lock(&print_mtx); if (opts.print_filename_only) { /* If the --files-without-matches or -L option in passed we should * not print a matching line. This option currently sets * opts.print_filename_only and opts.invert_match. Unfortunately * setting the latter has the side effect of making matches.len = 1 * on a file-without-matches which is not desired behaviour. See * GitHub issue 206 for the consequences if this behaviour is not * checked. */ if (!opts.invert_match || matches_len < 2) { print_path(dir_full_path, '\n'); } } else if (binary) { print_binary_file_matches(dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } pthread_mutex_unlock(&print_mtx); } else { log_debug("No match in %s", dir_full_path); } if (matches_size > 0) { free(matches); } }
void search_buf(char *buf, size_t buf_len, const char *dir_full_path, char *tmp_file_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ size_t buf_offset = 0; //if (opts.search_stream) { // binary = 0; //} //else if (!opts.search_binary_files) { // binary = is_binary(buf, buf_len); // if (binary) { // log_debug("File %s is binary. Skipping...", dir_full_path); // if (!convert_to_text(&buf, &buf_len, dir_full_path, tmp_file_path)) // return; // } //} binary = is_binary(buf, buf_len); if (binary) { log_debug("File %s is binary. Convert to text...", dir_full_path); if (!convert_to_text(&buf, &buf_len, dir_full_path, tmp_file_path)) return; } size_t matches_len = 0; match_t *matches; size_t matches_size; size_t matches_spare; //if (opts.invert_match) { // /* If we are going to invert the set of matches at the end, we will need // * one extra match struct, even if there are no matches at all. So make // * sure we have a nonempty array; and make sure we always have spare // * capacity for one extra. // */ // matches_size = 100; // matches = (match_t *)ag_malloc(matches_size * sizeof(match_t)); // matches_spare = 1; //} //else { // matches_size = 0; // matches = NULL; // matches_spare = 0; //} matches_size = 0; matches = NULL; matches_spare = 0; if (/*!opts.literal && */opts.query_len == 1 && opts.query[0] == '.') { matches_size = 1; matches = (match_t *)ag_malloc(matches_size * sizeof(match_t)); matches[0].start = 0; matches[0].end = buf_len; matches_len = 1; } else { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts.casing); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, alpha_skip_lookup, find_skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if (matches_len + matches_spare >= matches_size) { /* TODO: benchmark initial size of matches. 100 may be too small/big */ matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = (match_t *)ag_realloc(matches, matches_size * sizeof(match_t)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else {