void search_file(const char *file_full_path) { int fd = -1; off_t f_len = 0; char *buf = NULL; struct stat statbuf; int rv = 0; FILE *pipe = NULL; fd = open(file_full_path, O_RDONLY); if (fd < 0) { log_err("Error opening file %s: %s Skipping...", file_full_path, strerror(errno)); goto cleanup; } rv = fstat(fd, &statbuf); if (rv != 0) { log_err("Error fstat()ing file %s. Skipping...", file_full_path); goto cleanup; } if ((statbuf.st_mode & S_IFMT) == 0) { log_err("%s is not a file. Mode %u. Skipping...", file_full_path, statbuf.st_mode); goto cleanup; } if (statbuf.st_mode & S_IFIFO) { log_debug("%s is a named pipe. stream searching", file_full_path); pipe = fdopen(fd, "r"); search_stream(pipe, file_full_path); fclose(pipe); } else { f_len = statbuf.st_size; if (f_len == 0) { log_debug("File %s is empty, skipping.", file_full_path); goto cleanup; } buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { log_err("File %s failed to load: %s.", file_full_path, strerror(errno)); goto cleanup; } search_buf(buf, (int)f_len, file_full_path); } cleanup: ; if (fd != -1) { munmap(buf, f_len); close(fd); } }
int process_file(f_state *s) { //printf("processing file\n"); f_info *i = (f_info *)malloc(sizeof(f_info)); char temp[PATH_MAX]; if ((realpath(s->input_file, temp)) == NULL) { print_error(s, s->input_file, strerror(errno)); return TRUE; } i->file_name = strdup(s->input_file); i->is_stdin = FALSE; audit_start(s, i); // printf("opening file %s\n",i->file_name); #if defined(__LINUX) #ifdef DEBUG printf("Using 64 bit fopen\n"); #endif i->handle = fopen64(i->file_name, "rb"); #elif defined(__WIN32) /*I would like to be able to read from * physical devices in Windows, have played * with different options to fopen and the * dd src says you need write access on WinXP * but nothing seems to work*/ i->handle = fopen(i->file_name, "rb"); #else i->handle = fopen(i->file_name, "rb"); #endif if (i->handle == NULL) { print_error(s, s->input_file, strerror(errno)); audit_msg(s, "Error: %s", strerror(errno)); return TRUE; } i->total_bytes = find_file_size(i->handle); search_stream(s, i); audit_finish(s, i); fclose(i->handle); free(i); return FALSE; }
int process_stdin(f_state *s) { f_info *i = (f_info *)malloc(sizeof(f_info)); i->file_name = strdup("stdin"); s->input_file = "stdin"; i->handle = stdin; i->is_stdin = TRUE; /* We can't compute the size of this stream, we just ignore it*/ i->total_bytes = 0; audit_start(s, i); search_stream(s, i); free(i->file_name); free(i); return FALSE; }
int main(int argc, char **argv) { char **paths = NULL; int i; int pcre_opts = PCRE_MULTILINE; int study_opts = 0; double time_diff; pthread_t *workers = NULL; int workers_len; set_log_level(LOG_LEVEL_WARN); work_queue = NULL; work_queue_tail = NULL; memset(&stats, 0, sizeof(stats)); root_ignores = init_ignore(NULL); out_fd = stdout; #ifdef USE_PCRE_JIT int has_jit = 0; pcre_config(PCRE_CONFIG_JIT, &has_jit); if (has_jit) { study_opts |= PCRE_STUDY_JIT_COMPILE; } #endif gettimeofday(&(stats.time_start), NULL); parse_options(argc, argv, &paths); log_debug("PCRE Version: %s", pcre_version()); if (opts.workers) { workers_len = opts.workers; } else { /* Experiments show that two worker threads appear to be optimal, both * on dual-core and quad-core systems. See * http://geoff.greer.fm/2012/09/07/the-silver-searcher-adding-pthreads/. * On single-core CPUs, more than one worker thread makes no sense. */ int ncpus = (int)sysconf(_SC_NPROCESSORS_ONLN); workers_len = (ncpus >= 2) ? 2 : 1; } log_debug("Using %i workers", workers_len); done_adding_files = FALSE; workers = ag_calloc(workers_len, sizeof(pthread_t)); if (pthread_cond_init(&files_ready, NULL)) { log_err("pthread_cond_init failed!"); exit(2); } if (pthread_mutex_init(&print_mtx, NULL)) { log_err("pthread_mutex_init failed!"); exit(2); } if (pthread_mutex_init(&stats_mtx, NULL)) { log_err("pthread_mutex_init failed!"); exit(2); } if (pthread_mutex_init(&work_queue_mtx, NULL)) { log_err("pthread_mutex_init failed!"); exit(2); } if (opts.casing == CASE_SMART) { opts.casing = contains_uppercase(opts.query) ? CASE_SENSITIVE : CASE_INSENSITIVE; } if (opts.literal) { if (opts.casing == CASE_INSENSITIVE) { /* Search routine needs the query to be lowercase */ char *c = opts.query; for (; *c != '\0'; ++c) { *c = (char) tolower(*c); } } generate_skip_lookup(opts.query, opts.query_len, skip_lookup, opts.casing == CASE_SENSITIVE); if (opts.word_regexp) { init_wordchar_table(); opts.literal_starts_wordchar = is_wordchar(opts.query[0]); opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]); } } else { if (opts.casing == CASE_INSENSITIVE) { pcre_opts = pcre_opts | PCRE_CASELESS; } if (opts.word_regexp) { char *word_regexp_query; asprintf(&word_regexp_query, "\\b%s\\b", opts.query); free(opts.query); opts.query = word_regexp_query; opts.query_len = strlen(opts.query); } compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts); } if (opts.search_stream) { search_stream(stdin, ""); } else { for (i = 0; i < workers_len; i++) { int ptc_rc = pthread_create(&(workers[i]), NULL, &search_file_worker, NULL); check_err(ptc_rc, "create worker thread"); } for (i = 0; paths[i] != NULL; i++) { log_debug("searching path %s for %s", paths[i], opts.query); search_dir(root_ignores, paths[i], 0); } done_adding_files = TRUE; pthread_cond_broadcast(&files_ready); for (i = 0; i < workers_len; i++) { if (pthread_join(workers[i], NULL)) { log_err("pthread_join failed!"); exit(2); } } } if (opts.stats) { gettimeofday(&(stats.time_end), NULL); time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); time_diff /= 1000000; printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); } if (opts.pager) { pclose(out_fd); } pthread_cond_destroy(&files_ready); pthread_mutex_destroy(&work_queue_mtx); pthread_mutex_destroy(&stats_mtx); pthread_mutex_destroy(&print_mtx); cleanup_ignore(root_ignores); free(workers); free(paths); return 0; }
void search_stdin(const pcre *re, const pcre_extra *re_extra) { search_stream(re, re_extra, stdin, ""); }
/*main function*/ int main(int argc, char *argv[]) { search *mysearch; /*ptr to a structure holding the search request */ remote *rmt; Client_para *para; pthread_t id; int err, rmt_flag; char *temp; time_type t_start, t_end; double tdiff; /*initialization*/ err = 0; rmt_flag = 0; mysearch = NULL; init_search(&mysearch); /* process all the command line switches */ opterr = 0; /* prevent getopt() from printing error messages */ scan_opt_search(argc, argv, mysearch); /*move the getopt to command_util.h to shorten the main */ /*build the shift table*/ build_shifttable(mysearch); /* * if there is no argument in list of files * directly go to stdin */ if (optind >= argc) { search_stream(stdin, NULL, mysearch, NULL); return 0; } get_time(&t_start); /* process the list of files*/ for (; optind < argc; optind++) { /* if it a remote search */ temp = argv[optind]; /* if it is remote search and has been successfully parsed*/ if ((rmt = scan_remote_search(temp, &rmt_flag)) != NULL) { if ((para = (Client_para*) malloc(sizeof(Client_para))) == NULL) { perror("malloc"); continue; } para->mysearch = mysearch; para->rmt = rmt; /* spawn a thread the perform remote search*/ // increment the stack count pthread_mutex_lock(&(mysearch->lock)); err = pthread_create(&id, NULL, client_agent, (void*) para); mysearch->stk_count++; if (err != 0) { fprintf(stderr, "Pthread_create of client\n"); free(para); mysearch->stk_count--; pthread_mutex_unlock(&(mysearch->lock)); continue; } pthread_mutex_unlock(&(mysearch->lock)); continue; } else if (rmt_flag == 1) { /* if a : used to appear there but has not been parsed, neglect it */ continue; } if (strcmp(temp, STREAM_REDIRECT) == 0) /* "-" redirect the io to stdin*/ { search_stream(stdin, NULL, mysearch, NULL); continue; } search_given(temp, mysearch); } /* wait all the thread to be done*/ while (mysearch->stk_count != 0) { pthread_cond_wait(&(mysearch->ready), &(mysearch->lock)); } get_time(&t_end); tdiff = time_diff(&t_start, &t_end); /* print the statistical result*/ print_stat(stdout, &(mysearch->statistics), tdiff); destroy_search(mysearch); return 0; } /* main */
void search_file(const char *file_full_path) { int fd = -1; off_t f_len = 0; char *buf = NULL; struct stat statbuf; int rv = 0; FILE *pipe = NULL; fd = open(file_full_path, O_RDONLY); if (fd < 0) { log_err("Error opening file %s: %s Skipping...", file_full_path, strerror(errno)); goto cleanup; } rv = fstat(fd, &statbuf); if (rv != 0) { log_err("Error fstat()ing file %s. Skipping...", file_full_path); goto cleanup; } if (opts.stdout_inode != 0 && opts.stdout_inode == statbuf.st_ino) { log_debug("Skipping %s because stdout is redirected to it", file_full_path); goto cleanup; } if ((statbuf.st_mode & S_IFMT) == 0) { log_err("%s is not a file. Mode %u. Skipping...", file_full_path, statbuf.st_mode); goto cleanup; } if (statbuf.st_mode & S_IFIFO) { log_debug("%s is a named pipe. stream searching", file_full_path); pipe = fdopen(fd, "r"); search_stream(pipe, file_full_path); fclose(pipe); } else { f_len = statbuf.st_size; if (f_len == 0) { log_debug("File %s is empty, skipping.", file_full_path); goto cleanup; } #ifdef _WIN32 { HANDLE hmmap = CreateFileMapping( (HANDLE)_get_osfhandle(fd), 0, PAGE_READONLY, 0, f_len, NULL); buf = (char*) MapViewOfFile(hmmap, FILE_SHARE_READ, 0, 0, f_len); if (hmmap != NULL) CloseHandle(hmmap); } if (buf == NULL) { FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, GetLastError(), 0, (void*) &buf, 0, NULL); log_err("File %s failed to load: %s.", file_full_path, buf); LocalFree((void*)buf); goto cleanup; } #else buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { log_err("File %s failed to load: %s.", file_full_path, strerror(errno)); goto cleanup; } #endif if (opts.search_zip_files) { ag_compression_type zip_type = is_zipped(buf, f_len); if (zip_type != AG_NO_COMPRESSION) { int _buf_len = (int)f_len; char *_buf = decompress(zip_type, buf, f_len, file_full_path, &_buf_len); if (_buf == NULL || _buf_len == 0) { log_err("Cannot decompress zipped file %s", file_full_path); goto cleanup; } search_buf(_buf, _buf_len, file_full_path); free(_buf); goto cleanup; } } search_buf(buf, (int)f_len, file_full_path); } cleanup:; if (fd != -1) { #ifdef _WIN32 UnmapViewOfFile(buf); #else munmap(buf, f_len); #endif close(fd); } }
int main(int argc, char **argv) { char **base_paths = NULL; char **paths = NULL; int i; int pcre_opts = PCRE_MULTILINE; int study_opts = 0; double time_diff; pthread_t *workers = NULL; int workers_len; set_log_level(LOG_LEVEL_WARN); work_queue = NULL; work_queue_tail = NULL; memset(&stats, 0, sizeof(stats)); root_ignores = init_ignore(NULL, "", 0); out_fd = stdout; #ifdef USE_PCRE_JIT int has_jit = 0; pcre_config(PCRE_CONFIG_JIT, &has_jit); if (has_jit) { study_opts |= PCRE_STUDY_JIT_COMPILE; } #endif gettimeofday(&(stats.time_start), NULL); parse_options(argc, argv, &base_paths, &paths); log_debug("PCRE Version: %s", pcre_version()); #ifdef _WIN32 { SYSTEM_INFO si; GetSystemInfo(&si); workers_len = si.dwNumberOfProcessors; } #else workers_len = (int)sysconf(_SC_NPROCESSORS_ONLN); #endif if (opts.literal) { workers_len--; } if (opts.workers) { workers_len = opts.workers; } if (workers_len < 1) { workers_len = 1; } log_debug("Using %i workers", workers_len); done_adding_files = FALSE; workers = ag_calloc(workers_len, sizeof(pthread_t)); if (pthread_cond_init(&files_ready, NULL)) { die("pthread_cond_init failed!"); } if (pthread_mutex_init(&print_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (pthread_mutex_init(&stats_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (pthread_mutex_init(&work_queue_mtx, NULL)) { die("pthread_mutex_init failed!"); } if (opts.casing == CASE_SMART) { opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE; } if (opts.literal) { if (opts.casing == CASE_INSENSITIVE) { /* Search routine needs the query to be lowercase */ char *c = opts.query; for (; *c != '\0'; ++c) { *c = (char)tolower(*c); } } generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE); find_skip_lookup = NULL; generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE); if (opts.word_regexp) { init_wordchar_table(); opts.literal_starts_wordchar = is_wordchar(opts.query[0]); opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]); } } else { if (opts.casing == CASE_INSENSITIVE) { pcre_opts |= PCRE_CASELESS; } if (opts.word_regexp) { char *word_regexp_query; ag_asprintf(&word_regexp_query, "\\b%s\\b", opts.query); free(opts.query); opts.query = word_regexp_query; opts.query_len = strlen(opts.query); } compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts); } if (opts.search_stream) { search_stream(stdin, ""); } else { for (i = 0; i < workers_len; i++) { int rv = pthread_create(&(workers[i]), NULL, &search_file_worker, &i); if (rv != 0) { die("error in pthread_create(): %s", strerror(rv)); } } for (i = 0; paths[i] != NULL; i++) { log_debug("searching path %s for %s", paths[i], opts.query); symhash = NULL; ignores *ig = init_ignore(root_ignores, "", 0); search_dir(ig, base_paths[i], paths[i], 0); cleanup_ignore(ig); } pthread_mutex_lock(&work_queue_mtx); done_adding_files = TRUE; pthread_cond_broadcast(&files_ready); pthread_mutex_unlock(&work_queue_mtx); for (i = 0; i < workers_len; i++) { if (pthread_join(workers[i], NULL)) { die("pthread_join failed!"); } } } if (opts.stats) { gettimeofday(&(stats.time_end), NULL); time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); time_diff /= 1000000; printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); } if (opts.pager) { pclose(out_fd); } cleanup_options(); pthread_cond_destroy(&files_ready); pthread_mutex_destroy(&work_queue_mtx); pthread_mutex_destroy(&stats_mtx); pthread_mutex_destroy(&print_mtx); cleanup_ignore(root_ignores); free(workers); for (i = 0; paths[i] != NULL; i++) { free(paths[i]); free(base_paths[i]); } free(base_paths); free(paths); if (find_skip_lookup) { free(find_skip_lookup); } return !opts.match_found; }
int wmain(int argc, wchar_t **argv) { wchar_t **base_paths = NULL; wchar_t **paths = NULL; int i; int pcre_opts = PCRE_MULTILINE; int study_opts = 0; double time_diff; worker_t *workers = NULL; int workers_len; int num_cores; #ifdef KJK_BUILD extern void setup_crash_handler(); /* in kjk_crash_handler.cpp */ setup_crash_handler(); #endif set_log_level(LOG_LEVEL_WARN); work_queue = NULL; work_queue_tail = NULL; memset(&stats, 0, sizeof(stats)); root_ignores = init_ignore(NULL, L"", 0); out_fd = stdout; #ifdef USE_PCRE_JIT int has_jit = 0; pcre_config(PCRE_CONFIG_JIT, &has_jit); if (has_jit) { study_opts |= PCRE_STUDY_JIT_COMPILE; } #endif gettimeofday(&(stats.time_start), NULL); parse_options(argc, argv, &base_paths, &paths); log_debug(L"PCRE Version: %s", pcre16_version()); setlocale(LC_ALL, "chs"); #ifdef _WIN32 { SYSTEM_INFO si; GetSystemInfo(&si); num_cores = si.dwNumberOfProcessors; } #else num_cores = (int)sysconf(_SC_NPROCESSORS_ONLN); #endif workers_len = num_cores; if (opts.literal) { workers_len--; } if (opts.workers) { workers_len = opts.workers; } if (workers_len < 1) { workers_len = 1; } log_debug(L"Using %i workers", workers_len); done_adding_files = FALSE; workers = (worker_t *) ag_calloc(workers_len, sizeof(worker_t)); if (pthread_cond_init(&files_ready, NULL)) { die(L"pthread_cond_init failed!"); } if (pthread_mutex_init(&print_mtx, NULL)) { die(L"pthread_mutex_init failed!"); } if (pthread_mutex_init(&stats_mtx, NULL)) { die(L"pthread_mutex_init failed!"); } if (pthread_mutex_init(&work_queue_mtx, NULL)) { die(L"pthread_mutex_init failed!"); } if (opts.casing == CASE_SMART) { opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE; } if (opts.literal) { if (opts.casing == CASE_INSENSITIVE) { /* Search routine needs the query to be lowercase */ wchar_t *c = opts.query; for (; *c != '\0'; ++c) { *c = (wchar_t)towlower(*c); } } generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE); find_skip_lookup = NULL; generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE); if (opts.word_regexp) { init_wordchar_table(); opts.literal_starts_wordchar = is_wordchar(opts.query[0]); opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]); } } else { if (opts.casing == CASE_INSENSITIVE) { pcre_opts |= PCRE_CASELESS; } if (opts.word_regexp) { wchar_t *word_regexp_query; ag_asprintf(&word_regexp_query, L"\\b%s\\b", opts.query); free(opts.query); opts.query = word_regexp_query; opts.query_len = wcslen(opts.query); } compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts); } if (opts.search_stream) { search_stream(stdin, L""); } else { for (i = 0; i < workers_len; i++) { workers[i].id = i; int rv = pthread_create(&(workers[i].thread), NULL, &search_file_worker, &(workers[i].id)); if (rv != 0) { die(L"error in pthread_create(): %s", strerror(rv)); } #if defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(USE_CPU_SET) if (opts.use_thread_affinity) { cpu_set_t cpu_set; CPU_ZERO(&cpu_set); CPU_SET(i % num_cores, &cpu_set); rv = pthread_setaffinity_np(workers[i].thread, sizeof(cpu_set), &cpu_set); if (rv != 0) { die("error in pthread_setaffinity_np(): %s", strerror(rv)); } log_debug("Thread %i set to CPU %i", i, i); } else { log_debug("Thread affinity disabled."); } #else log_debug(L"No CPU affinity support."); #endif } for (i = 0; paths[i] != NULL; i++) { log_debug(L"searching path %s for %s", paths[i], opts.query); symhash = NULL; ignores *ig = init_ignore(root_ignores, L"", 0); struct stat s; s.st_dev = 0; #ifndef _WIN32 /* The device is ignored if opts.one_dev is false, so it's fine * to leave it at the default 0 */ if (opts.one_dev && lstat(paths[i], &s) == -1) { log_err("Failed to get device information for path %s. Skipping...", paths[i]); } #endif search_dir(ig, base_paths[i], paths[i], 0, s.st_dev); cleanup_ignore(ig); } pthread_mutex_lock(&work_queue_mtx); done_adding_files = TRUE; pthread_cond_broadcast(&files_ready); pthread_mutex_unlock(&work_queue_mtx); for (i = 0; i < workers_len; i++) { if (pthread_join(workers[i].thread, NULL)) { die(L"pthread_join failed!"); } } } if (opts.stats) { gettimeofday(&(stats.time_end), NULL); time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) - ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec); time_diff /= 1000000; wprintf(L"%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff); } if (opts.pager) { pclose(out_fd); } cleanup_options(); pthread_cond_destroy(&files_ready); pthread_mutex_destroy(&work_queue_mtx); pthread_mutex_destroy(&stats_mtx); pthread_mutex_destroy(&print_mtx); cleanup_ignore(root_ignores); free(workers); for (i = 0; paths[i] != NULL; i++) { free(paths[i]); free(base_paths[i]); } free(base_paths); free(paths); if (find_skip_lookup) { free(find_skip_lookup); } return !opts.match_found; }