ignores *init_ignore(ignores *parent, const char *dirname, const size_t dirname_len) { ignores *ig = ag_malloc(sizeof(ignores)); ig->names = NULL; ig->names_len = 0; ig->slash_names = NULL; ig->slash_names_len = 0; ig->regexes = NULL; ig->regexes_len = 0; ig->slash_regexes = NULL; ig->slash_regexes_len = 0; ig->dirname = dirname; ig->dirname_len = dirname_len; if (parent && is_empty(parent) && parent->parent) { ig->parent = parent->parent; } else { ig->parent = parent; } if (parent && parent->abs_path_len > 0) { ag_asprintf(&(ig->abs_path), "%s/%s", parent->abs_path, dirname); ig->abs_path_len = parent->abs_path_len + 1 + dirname_len; } else if (dirname_len == 1 && dirname[0] == '.') { ig->abs_path = ag_malloc(sizeof(char)); ig->abs_path[0] = '\0'; ig->abs_path_len = 0; } else { ag_asprintf(&(ig->abs_path), "%s", dirname); ig->abs_path_len = dirname_len; } return ig; }
char *make_lang_regex(const char **extensions) { int regex_capacity = 100; char *regex = ag_malloc(regex_capacity); int regex_length = 3; int subsequent = 0; const char **extension; strcpy(regex, "\\.("); for (extension = extensions; *extension; ++extension) { int extension_length = strlen(*extension); while (regex_length + extension_length + 3 + subsequent > regex_capacity) { regex_capacity *= 2; regex = ag_realloc(regex, regex_capacity); } if (subsequent) { regex[regex_length++] = '|'; } else { subsequent = 1; } strcpy(regex + regex_length, *extension); regex_length += extension_length; } regex[regex_length++] = ')'; regex[regex_length++] = '$'; regex[regex_length++] = 0; return regex; }
char *make_lang_regex(char *ext_array, size_t num_exts) { int regex_capacity = 100; char *regex = ag_malloc(regex_capacity); int regex_length = 3; int subsequent = 0; char *extension; size_t i; strcpy(regex, "\\.("); for (i = 0; i < num_exts; ++i) { extension = ext_array + i * SINGLE_EXT_LEN; int extension_length = strlen(extension); while (regex_length + extension_length + 3 + subsequent > regex_capacity) { regex_capacity *= 2; regex = ag_realloc(regex, regex_capacity); } if (subsequent) { regex[regex_length++] = '|'; } else { subsequent = 1; } strcpy(regex + regex_length, extension); regex_length += extension_length; } regex[regex_length++] = ')'; regex[regex_length++] = '$'; regex[regex_length++] = 0; return regex; }
size_t combine_file_extensions(size_t *extension_index, size_t len, char **exts) { /* Keep it fixed as 100 for the reason that if you have more than 100 * file types to search, you'd better search all the files. * */ size_t ext_capacity = 100; (*exts) = (char *)ag_malloc(ext_capacity * SINGLE_EXT_LEN); memset((*exts), 0, ext_capacity * SINGLE_EXT_LEN); size_t num_of_extensions = 0; size_t i; for (i = 0; i < len; ++i) { size_t j = 0; const char *ext = langs[extension_index[i]].extensions[j]; do { if (num_of_extensions == ext_capacity) { break; } char *pos = (*exts) + num_of_extensions * SINGLE_EXT_LEN; strncpy(pos, ext, strlen(ext)); ++num_of_extensions; ext = langs[extension_index[i]].extensions[++j]; } while (ext); } return num_of_extensions; }
static int check_symloop_enter(const char *path, dirkey_t *outkey) { #ifdef _WIN32 return SYMLOOP_OK; #else struct stat buf; symdir_t *item_found = NULL; symdir_t *new_item = NULL; memset(outkey, 0, sizeof(dirkey_t)); outkey->dev = 0; outkey->ino = 0; int res = stat(path, &buf); if (res != 0) { log_err("Error stat()ing: %s", path); return SYMLOOP_ERROR; } outkey->dev = buf.st_dev; outkey->ino = buf.st_ino; HASH_FIND(hh, symhash, outkey, sizeof(dirkey_t), item_found); if (item_found) { return SYMLOOP_LOOP; } new_item = (symdir_t*)ag_malloc(sizeof(symdir_t)); memcpy(&new_item->key, outkey, sizeof(dirkey_t)); HASH_ADD(hh, symhash, key, sizeof(dirkey_t), new_item); return SYMLOOP_OK; #endif }
ignores *init_ignore(ignores *parent) { ignores *ig = ag_malloc(sizeof(ignores)); ig->names = NULL; ig->names_len = 0; ig->regexes = NULL; ig->regexes_len = 0; ig->parent = parent; return ig; }
/* BTW, in case you're wondering, this returns void* so that * it can be used as a pthread... */ static void *default_start_server(void *arg) { server_t self = arg; connection_t con; int i; if (!self) return NULL; /* * TODO. * This shouldn't be done here. It doesn't really matter * because it is basically a no-op right now anyway. * A single process can have several servers running * in it, conceivably. * * Maybe it should be moved to ag.c and left up to main. */ become_daemon(self); /* configure the wait_queue's max size */ self->wq->max = self->maxcon; /* this is the connection handler: */ self->handlers = ag_malloc(sizeof(handler_rec) * self->numthreads); for (i = 0; i < self->numthreads; i++) { handler_t hd; hd = self->newhandler(); hd->svr = self; self->handlers[i].hd = hd; (void) pthread_create(&self->handlers[i].tid, /* thread id */ NULL, /* attributes */ hd->start, /* function to run */ (void *)hd /* arg to pass to function */ ); } /* * must do this before changing uids, because we may want to bind * to a port < 1024 and then become a non-root user. * * TODO * At this point we should also open up the logfile. */ self->listen_sk = setup_connection(self); /* only change users if self->uid is non-zero (non-root) */ if (self->uid) become_user(self); for (;;) { con = accept_connection(self->listen_sk); if (!self->wq->add_con(self->wq, con)) con->destroy(con); } }
queue_t new_queue() { queue_t q; q = ag_malloc(queue_size); q->push = default_push; q->pop = default_pop; q->destroy = default_destroy_q; return q; }
char *ag_strndup(const char *s, size_t size) { char *str = NULL; #ifdef HAVE_STRNDUP str = strndup(s, size); CHECK_AND_RETURN(str) #else str = (char *)ag_malloc(size + 1); strlcpy(str, s, size + 1); return str; #endif }
server_t new_server(server_t svr) { if (!svr) svr = ag_malloc(svr_size); svr->wq = new_wait_queue(); svr->destroy = default_destroy_svr; svr->summary = default_summary; svr->start = default_start_server; svr->newhandler = NULL; svr->pidfile = svr->logfile = svr->name = ""; return svr; }
void generate_find_skip(const char *find, const size_t f_len, size_t **skip_lookup, const int case_sensitive) { size_t i; size_t s_len; size_t *sl = (size_t *)ag_malloc(f_len * sizeof(size_t)); *skip_lookup = sl; size_t last_prefix = f_len; for (i = last_prefix; i > 0; i--) { if (is_prefix(find, f_len, i, case_sensitive)) { last_prefix = i; } sl[i - 1] = last_prefix + (f_len - i); } for (i = 0; i < f_len; i++) { s_len = suffix_len(find, f_len, i, case_sensitive); if (find[i - s_len] != find[f_len - 1 - s_len]) { sl[f_len - 1 - s_len] = f_len - 1 - i + s_len; } } }
static connection_t accept_connection(int lsock) { int sock; struct sockaddr_in *name; int len; connection_t con; name = ag_malloc(sizeof(struct sockaddr_in)); if ((sock = accept(lsock, name, &len)) <= 0) { fprintf(stderr, "accept_connection: error\n"); } if (len > sizeof(struct sockaddr_in)) { fprintf(stderr, "accept_connection: memory corruption.\n"); exit(1); } con = new_connection(); con->sk = sock; con->name = name; return con; }
void search_buf(char *buf, size_t buf_len, const char *dir_full_path, char *tmp_file_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ size_t buf_offset = 0; //if (opts.search_stream) { // binary = 0; //} //else if (!opts.search_binary_files) { // binary = is_binary(buf, buf_len); // if (binary) { // log_debug("File %s is binary. Skipping...", dir_full_path); // if (!convert_to_text(&buf, &buf_len, dir_full_path, tmp_file_path)) // return; // } //} binary = is_binary(buf, buf_len); if (binary) { log_debug("File %s is binary. Convert to text...", dir_full_path); if (!convert_to_text(&buf, &buf_len, dir_full_path, tmp_file_path)) return; } size_t matches_len = 0; match_t *matches; size_t matches_size; size_t matches_spare; //if (opts.invert_match) { // /* If we are going to invert the set of matches at the end, we will need // * one extra match struct, even if there are no matches at all. So make // * sure we have a nonempty array; and make sure we always have spare // * capacity for one extra. // */ // matches_size = 100; // matches = (match_t *)ag_malloc(matches_size * sizeof(match_t)); // matches_spare = 1; //} //else { // matches_size = 0; // matches = NULL; // matches_spare = 0; //} matches_size = 0; matches = NULL; matches_spare = 0; if (/*!opts.literal && */opts.query_len == 1 && opts.query[0] == '.') { matches_size = 1; matches = (match_t *)ag_malloc(matches_size * sizeof(match_t)); matches[0].start = 0; matches[0].end = buf_len; matches_len = 1; } else { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts.casing); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, alpha_skip_lookup, find_skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if (matches_len + matches_spare >= matches_size) { /* TODO: benchmark initial size of matches. 100 may be too small/big */ matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = (match_t *)ag_realloc(matches, matches_size * sizeof(match_t)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else {
void search_buf(const char *buf, const size_t buf_len, const char *dir_full_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ size_t buf_offset = 0; if (opts.search_stream) { binary = 0; } else if (!opts.search_binary_files) { binary = is_binary((const void *)buf, buf_len); if (binary) { log_debug("File %s is binary. Skipping...", dir_full_path); return; } } int matches_len = 0; match *matches; size_t matches_size; size_t matches_spare; if (opts.invert_match) { /* If we are going to invert the set of matches at the end, we will need * one extra match struct, even if there are no matches at all. So make * sure we have a nonempty array; and make sure we always have spare * capacity for one extra. */ matches_size = 100; matches = ag_malloc(matches_size * sizeof(match)); matches_spare = 1; } else { matches_size = 0; matches = NULL; matches_spare = 0; } if (opts.query_len == 1 && opts.query[0] == '.') { matches_size = 1; matches = ag_malloc(matches_size * sizeof(match)); matches[0].start = 0; matches[0].end = buf_len; matches_len = 1; } else if (opts.literal) { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts.casing); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if ((size_t)matches_len + matches_spare >= matches_size) { /* TODO: benchmark initial size of matches. 100 may be too small/big */ matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { int offset_vector[3]; while (buf_offset < buf_len && (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; /* TODO: copy-pasted from above. FIXME */ if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.invert_match) { matches_len = invert_matches(buf, buf_len, matches, matches_len); } if (opts.stats) { pthread_mutex_lock(&stats_mtx); stats.total_bytes += buf_len; stats.total_files++; stats.total_matches += matches_len; pthread_mutex_unlock(&stats_mtx); } if (matches_len > 0) { if (binary == -1 && !opts.print_filename_only) { binary = is_binary((const void *)buf, buf_len); } pthread_mutex_lock(&print_mtx); if (opts.print_filename_only) { /* If the --files-without-matches or -L option in passed we should * not print a matching line. This option currently sets * opts.print_filename_only and opts.invert_match. Unfortunately * setting the latter has the side effect of making matches.len = 1 * on a file-without-matches which is not desired behaviour. See * GitHub issue 206 for the consequences if this behaviour is not * checked. */ if (!opts.invert_match || matches_len < 2) { print_path(dir_full_path, '\n'); } } else if (binary) { print_binary_file_matches(dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } pthread_mutex_unlock(&print_mtx); } else { log_debug("No match in %s", dir_full_path); } if (matches_size > 0) { free(matches); } }
void load_svn_ignore_patterns(ignores *ig, const char *path) { FILE *fp = NULL; char *dir_prop_base; ag_asprintf(&dir_prop_base, "%s/%s", path, SVN_DIR_PROP_BASE); fp = fopen(dir_prop_base, "r"); if (fp == NULL) { log_debug("Skipping svn ignore file %s", dir_prop_base); free(dir_prop_base); return; } char *entry = NULL; size_t entry_len = 0; char *key = ag_malloc(32); /* Sane start for max key length. */ size_t key_len = 0; size_t bytes_read = 0; char *entry_line; size_t line_len; int matches; while (fscanf(fp, "K %zu\n", &key_len) == 1) { key = ag_realloc(key, key_len + 1); bytes_read = fread(key, 1, key_len, fp); key[key_len] = '\0'; matches = fscanf(fp, "\nV %zu\n", &entry_len); if (matches != 1) { log_debug("Unable to parse svnignore file %s: fscanf() got %i matches, expected 1.", dir_prop_base, matches); goto cleanup; } if (strncmp(SVN_PROP_IGNORE, key, bytes_read) != 0) { log_debug("key is %s, not %s. skipping %u bytes", key, SVN_PROP_IGNORE, entry_len); /* Not the key we care about. fseek and repeat */ fseek(fp, entry_len + 1, SEEK_CUR); /* +1 to account for newline. yes I know this is hacky */ continue; } /* Aww yeah. Time to ignore stuff */ entry = ag_malloc(entry_len + 1); bytes_read = fread(entry, 1, entry_len, fp); entry[bytes_read] = '\0'; log_debug("entry: %s", entry); break; } if (entry == NULL) { goto cleanup; } char *patterns = entry; size_t patterns_len = strlen(patterns); while (*patterns != '\0' && patterns < (entry + bytes_read)) { for (line_len = 0; line_len < patterns_len; line_len++) { if (patterns[line_len] == '\n') { break; } } if (line_len > 0) { entry_line = ag_strndup(patterns, line_len); add_ignore_pattern(ig, entry_line); free(entry_line); } patterns += line_len + 1; patterns_len -= line_len + 1; } free(entry); cleanup: free(dir_prop_base); free(key); fclose(fp); }
void search_buf(const char *buf, const int buf_len, const char *dir_full_path) { int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */ int buf_offset = 0; if (opts.search_stream) { binary = 0; } else if (!opts.search_binary_files) { binary = is_binary((void*) buf, buf_len); if (binary) { log_debug("File %s is binary. Skipping...", dir_full_path); return; } } int matches_len = 0; match *matches; size_t matches_size; size_t matches_spare; if (opts.invert_match) { /* If we are going to invert the set of matches at the end, we will need * one extra match struct, even if there are no matches at all. So make * sure we have a nonempty array; and make sure we always have spare * capacity for one extra. */ matches_size = 100; matches = ag_malloc(matches_size * sizeof(match)); matches_spare = 1; } else { matches_size = 0; matches = NULL; matches_spare = 0; } if (opts.literal) { const char *match_ptr = buf; strncmp_fp ag_strnstr_fp = get_strstr(opts); while (buf_offset < buf_len) { match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup); if (match_ptr == NULL) { break; } if (opts.word_regexp) { const char *start = match_ptr; const char *end = match_ptr + opts.query_len; /* Check whether both start and end of the match lie on a word * boundary */ if ((start == buf || is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) && (end == buf + buf_len || is_wordchar(*end) != opts.literal_ends_wordchar)) { /* It's a match */ } else { /* It's not a match */ match_ptr += opts.query_len; buf_offset = end - buf; continue; } } if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = match_ptr - buf; matches[matches_len].end = matches[matches_len].start + opts.query_len; buf_offset = matches[matches_len].end; log_debug("Match found. File %s, offset %i bytes.", dir_full_path, matches[matches_len].start); matches_len++; match_ptr += opts.query_len; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } else { int rc; int offset_vector[3]; while (buf_offset < buf_len && (rc = pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) { log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]); buf_offset = offset_vector[1]; /* TODO: copy-pasted from above. FIXME */ if ((size_t)matches_len + matches_spare >= matches_size) { matches_size = matches ? matches_size * 2 : 100; log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size); matches = ag_realloc(matches, matches_size * sizeof(match)); } matches[matches_len].start = offset_vector[0]; matches[matches_len].end = offset_vector[1]; matches_len++; if (matches_len >= opts.max_matches_per_file) { log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path); break; } } } if (opts.invert_match) { matches_len = invert_matches(matches, matches_len, buf_len); } if (opts.stats) { pthread_mutex_lock(&stats_mtx); stats.total_bytes += buf_len; stats.total_files++; stats.total_matches += matches_len; pthread_mutex_unlock(&stats_mtx); } if (matches_len > 0) { if (binary == -1 && !opts.print_filename_only) { binary = is_binary((void*) buf, buf_len); } pthread_mutex_lock(&print_mtx); if (opts.print_filename_only) { print_path(dir_full_path, '\n'); } else if (binary) { print_binary_file_matches(dir_full_path); } else { print_file_matches(dir_full_path, buf, buf_len, matches, matches_len); } pthread_mutex_unlock(&print_mtx); } else { log_debug("No match in %s", dir_full_path); } if (matches_size > 0) { free(matches); } }
/* TODO: Append matches to some data structure instead of just printing them out. * Then ag can have sweet summaries of matches/files scanned/time/etc. */ void search_dir(ignores *ig, const char *base_path, const char *path, const int depth) { struct dirent **dir_list = NULL; struct dirent *dir = NULL; scandir_baton_t scandir_baton; int results = 0; char *dir_full_path = NULL; const char *ignore_file = NULL; int i; /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */ for (i = 0; opts.skip_vcs_ignores ? (i == 0) : (ignore_pattern_files[i] != NULL); i++) { ignore_file = ignore_pattern_files[i]; ag_asprintf(&dir_full_path, "%s/%s", path, ignore_file); if (strcmp(SVN_DIR, ignore_file) == 0) { load_svn_ignore_patterns(ig, dir_full_path); } else { load_ignore_patterns(ig, dir_full_path); } free(dir_full_path); dir_full_path = NULL; } if (opts.path_to_agignore) { load_ignore_patterns(ig, opts.path_to_agignore); } scandir_baton.ig = ig; scandir_baton.base_path = base_path; results = ag_scandir(path, &dir_list, &filename_filter, &scandir_baton); if (results == 0) { log_debug("No results found in directory %s", path); goto search_dir_cleanup; } else if (results == -1) { if (errno == ENOTDIR) { /* Not a directory. Probably a file. */ /* If we're only searching one file, don't print the filename header at the top. */ if (depth == 0 && opts.paths_len == 1) { opts.print_heading = -1; } search_file(path); } else { log_err("Error opening directory %s: %s", path, strerror(errno)); } goto search_dir_cleanup; } int offset_vector[3]; int rc = 0; work_queue_t *queue_item; for (i = 0; i < results; i++) { queue_item = NULL; dir = dir_list[i]; ag_asprintf(&dir_full_path, "%s/%s", path, dir->d_name); /* If a link points to a directory then we need to treat it as a directory. */ if (!opts.follow_symlinks && is_symlink(path, dir)) { log_debug("File %s ignored becaused it's a symlink", dir->d_name); goto cleanup; } if (!is_directory(path, dir)) { if (opts.file_search_regex) { rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path), 0, 0, offset_vector, 3); if (rc < 0) { /* no match */ log_debug("Skipping %s due to file_search_regex.", dir_full_path); goto cleanup; } else if (opts.match_files) { log_debug("match_files: file_search_regex matched for %s.", dir_full_path); pthread_mutex_lock(&print_mtx); print_path(dir_full_path, '\n'); pthread_mutex_unlock(&print_mtx); goto cleanup; } } queue_item = ag_malloc(sizeof(work_queue_t)); queue_item->path = dir_full_path; queue_item->next = NULL; pthread_mutex_lock(&work_queue_mtx); if (work_queue_tail == NULL) { work_queue = queue_item; } else { work_queue_tail->next = queue_item; } work_queue_tail = queue_item; pthread_mutex_unlock(&work_queue_mtx); pthread_cond_signal(&files_ready); log_debug("%s added to work queue", dir_full_path); } else if (opts.recurse_dirs) { if (depth < opts.max_search_depth) { log_debug("Searching dir %s", dir_full_path); ignores *child_ig = init_ignore(ig); search_dir(child_ig, base_path, dir_full_path, depth + 1); cleanup_ignore(child_ig); } else { log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path); } } cleanup: ; free(dir); dir = NULL; if (queue_item == NULL) { free(dir_full_path); dir_full_path = NULL; } } search_dir_cleanup: ; free(dir_list); dir_list = NULL; }
void search_file(const char *file_full_path, int search_zip_files) { int fd; off_t f_len = 0; char *buf = NULL; struct stat statbuf; int rv = 0; FILE *pipe = NULL; char* tmp_file_path = NULL; fd = open(file_full_path, O_RDONLY); if (fd < 0) { /* XXXX: strerror is not thread-safe */ log_err("Skipping %s: Error opening file: %s", file_full_path, strerror(errno)); goto cleanup; } rv = fstat(fd, &statbuf); if (rv != 0) { log_err("Skipping %s: Error fstat()ing file.", file_full_path); goto cleanup; } //if (opts.stdout_inode != 0 && opts.stdout_inode == statbuf.st_ino) { // log_debug("Skipping %s: stdout is redirected to it", file_full_path); // goto cleanup; //} if ((statbuf.st_mode & S_IFMT) == 0) { log_err("Skipping %s: Mode %u is not a file.", file_full_path, statbuf.st_mode); goto cleanup; } if (statbuf.st_mode & S_IFIFO) { log_debug("%s is a named pipe. stream searching", file_full_path); pipe = fdopen(fd, "r"); //search_stream(pipe, file_full_path); fclose(pipe); goto cleanup; } f_len = statbuf.st_size; if (f_len == 0) { log_debug("Skipping %s: file is empty.", file_full_path); goto cleanup; } if (/*!opts.literal && */f_len > INT_MAX) { log_err("Skipping %s: pcre_exec() can't handle files larger than %i bytes.", file_full_path, INT_MAX); goto cleanup; } #ifdef _WIN32 { HANDLE hmmap = CreateFileMapping( (HANDLE)_get_osfhandle(fd), 0, PAGE_READONLY, 0, f_len, NULL); buf = (char *)MapViewOfFile(hmmap, FILE_SHARE_READ, 0, 0, f_len); if (hmmap != NULL) CloseHandle(hmmap); } if (buf == NULL) { FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, GetLastError(), 0, (LPSTR)&buf, 0, NULL); log_err("File %s failed to load: %s.", file_full_path, buf); LocalFree((void *)buf); goto cleanup; } #else buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { log_err("File %s failed to load: %s.", file_full_path, strerror(errno)); goto cleanup; } #if HAVE_MADVISE madvise(buf, f_len, MADV_SEQUENTIAL); #elif HAVE_POSIX_FADVISE posix_fadvise(fd, 0, f_len, POSIX_MADV_SEQUENTIAL); #endif #endif tmp_file_path = (char *)ag_malloc(MAX_PATH); if (search_zip_files) { ag_compression_type zip_type = is_zipped(buf, f_len); if (zip_type != AG_NO_COMPRESSION) { int _buf_len = (int)f_len; char *_buf = (char*)decompress(zip_type, buf, f_len, file_full_path, &_buf_len); if (_buf == NULL || _buf_len == 0) { log_err("Cannot decompress zipped file %s", file_full_path); goto cleanup; } search_buf(_buf, _buf_len, file_full_path, tmp_file_path); free(_buf); goto cleanup; } } search_buf(buf, f_len, file_full_path, tmp_file_path); cleanup: if (buf != NULL) { #ifdef _WIN32 UnmapViewOfFile(buf); #else munmap(buf, f_len); #endif } if (fd != -1) { close(fd); } if (tmp_file_path != NULL) { DeleteFileA(tmp_file_path); free(tmp_file_path); } }
/* This function is REALLY HOT. It gets called for every file */ int filename_filter(const char *path, const struct dirent *dir, void *baton) { const char *filename = dir->d_name; size_t filename_len = strlen(filename); size_t i; scandir_baton_t *scandir_baton = (scandir_baton_t*) baton; const ignores *ig = scandir_baton->ig; const char *base_path = scandir_baton->base_path; size_t base_path_len = strlen(base_path); const char *path_start = path; char *temp; if (!opts.follow_symlinks && is_symlink(path, dir)) { log_debug("File %s ignored becaused it's a symlink", dir->d_name); return 0; } if (is_named_pipe(path, dir)) { log_debug("%s ignored because it's a named pipe", path); return 0; } for (i = 0; evil_hardcoded_ignore_files[i] != NULL; i++) { if (strcmp(filename, evil_hardcoded_ignore_files[i]) == 0) { return 0; } } if (!opts.search_hidden_files && filename[0] == '.') { return 0; } if (opts.search_all_files && !opts.path_to_agignore) { return 1; } for (i = 0; base_path[i] == path[i] && i < base_path_len; i++) { /* base_path always ends with "/\0" while path doesn't, so this is safe */ path_start = path + i + 2; } log_debug("path_start is %s", path_start); if (path_ignore_search(ig, path_start, filename, scandir_baton->level)) { return 0; } if (is_directory(path, dir) && filename[filename_len - 1] != '/') { ag_asprintf(&temp, "%s/", filename); int rv = path_ignore_search(ig, path_start, temp, scandir_baton->level); free(temp); if (rv) { return 0; } } if (ig->parent != NULL) { scandir_baton_t *ignore_relay = ag_malloc(sizeof(scandir_baton_t)); memcpy(ignore_relay, scandir_baton, sizeof(scandir_baton_t)); ignore_relay->ig = ig->parent; ignore_relay->level++; int rv = filename_filter(path, dir, (void *)ignore_relay); free(ignore_relay); return rv; } return 1; }