Esempio n. 1
0
void add_ignore_pattern(ignores *ig, const char* pattern) {
    int i;

    /* Strip off the leading ./ so that matches are more likely. */
    if (strncmp(pattern, "./", 2) == 0) {
        pattern += 2;
    }

    if (is_fnmatch(pattern)) {
        ig->regexes_len++;
        ig->regexes = ag_realloc(ig->regexes, ig->regexes_len * sizeof(char*));
        ig->regexes[ig->regexes_len - 1] = ag_strdup(pattern);
        log_debug("added regex ignore pattern %s", pattern);
    } else {
        /* a balanced binary tree is best for performance, but I'm lazy */
        ig->names_len++;
        ig->names = ag_realloc(ig->names, ig->names_len * sizeof(char*));
        for (i = ig->names_len - 1; i > 0; i--) {
            if (strcmp(pattern, ig->names[i-1]) > 0) {
                break;
            }
            ig->names[i] = ig->names[i-1];
        }
        ig->names[i] = ag_strdup(pattern);
        log_debug("added literal ignore pattern %s", pattern);
    }
}
Esempio n. 2
0
char *make_lang_regex(char *ext_array, size_t num_exts) {
    int regex_capacity = 100;
    char *regex = ag_malloc(regex_capacity);
    int regex_length = 3;
    int subsequent = 0;
    char *extension;
    size_t i;

    strcpy(regex, "\\.(");

    for (i = 0; i < num_exts; ++i) {
        extension = ext_array + i * SINGLE_EXT_LEN;
        int extension_length = strlen(extension);
        while (regex_length + extension_length + 3 + subsequent > regex_capacity) {
            regex_capacity *= 2;
            regex = ag_realloc(regex, regex_capacity);
        }
        if (subsequent) {
            regex[regex_length++] = '|';
        } else {
            subsequent = 1;
        }
        strcpy(regex + regex_length, extension);
        regex_length += extension_length;
    }

    regex[regex_length++] = ')';
    regex[regex_length++] = '$';
    regex[regex_length++] = 0;
    return regex;
}
Esempio n. 3
0
char *make_lang_regex(const char **extensions) {
    int regex_capacity = 100;
    char *regex = ag_malloc(regex_capacity);
    int regex_length = 3;
    int subsequent = 0;
    const char **extension;

    strcpy(regex, "\\.(");

    for (extension = extensions; *extension; ++extension) {
        int extension_length = strlen(*extension);
        while (regex_length + extension_length + 3 + subsequent > regex_capacity) {
            regex_capacity *= 2;
            regex = ag_realloc(regex, regex_capacity);
        }
        if (subsequent) {
            regex[regex_length++] = '|';
        } else {
            subsequent = 1;
        }
        strcpy(regex + regex_length, *extension);
        regex_length += extension_length;
    }

    regex[regex_length++] = ')';
    regex[regex_length++] = '$';
    regex[regex_length++] = 0;
    return regex;
}
Esempio n. 4
0
void realloc_matches(match_t **matches, size_t *matches_size, size_t matches_len) {
    if (matches_len < *matches_size) {
        return;
    }
    /* TODO: benchmark initial size of matches. 100 may be too small/big */
    *matches_size = *matches ? *matches_size * 2 : 100;
    *matches = (match_t*) ag_realloc(*matches, *matches_size * sizeof(match_t));
}
Esempio n. 5
0
void add_ignore_pattern(ignores *ig, const char* pattern) {
    int i;
    int pattern_len;

    if ('/' == pattern[0]) {
        log_debug("Pattern begins with '/', skipping.");
        return;
    }

    /* Strip off the leading ./ so that matches are more likely. */
    if (strncmp(pattern, "./", 2) == 0) {
        pattern += 2;
    }

    /* Kill trailing whitespace */
    for (pattern_len = strlen(pattern); pattern_len > 0; pattern_len--) {
        if (!isspace(pattern[pattern_len-1])) {
            break;
        }
    }

    if (pattern_len == 0) {
        log_debug("Pattern is empty. Not adding any ignores.");
        return;
    }

    /* TODO: de-dupe these patterns */
    if (is_fnmatch(pattern)) {
        ig->regexes_len++;
        ig->regexes = ag_realloc(ig->regexes, ig->regexes_len * sizeof(char*));
        ig->regexes[ig->regexes_len - 1] = ag_strndup(pattern, pattern_len);
        log_debug("added regex ignore pattern %s", pattern);
    } else {
        /* a balanced binary tree is best for performance, but I'm lazy */
        ig->names_len++;
        ig->names = ag_realloc(ig->names, ig->names_len * sizeof(char*));
        for (i = ig->names_len - 1; i > 0; i--) {
            if (strcmp(pattern, ig->names[i-1]) > 0) {
                break;
            }
            ig->names[i] = ig->names[i-1];
        }
        ig->names[i] = ag_strndup(pattern, pattern_len);
        log_debug("added literal ignore pattern %s", pattern);
    }
}
Esempio n. 6
0
void add_ignore_pattern(ignores *ig, const char *pattern) {
    int i;
    int pattern_len;

    /* Strip off the leading dot so that matches are more likely. */
    if (strncmp(pattern, "./", 2) == 0) {
        pattern++;
    }

    /* Kill trailing whitespace */
    for (pattern_len = strlen(pattern); pattern_len > 0; pattern_len--) {
        if (!isspace(pattern[pattern_len - 1])) {
            break;
        }
    }

    if (pattern_len == 0) {
        log_debug("Pattern is empty. Not adding any ignores.");
        return;
    }

    char ***patterns_p;
    size_t *patterns_len;
    if (is_fnmatch(pattern)) {
        if (pattern[0] == '/') {
            patterns_p = &(ig->slash_regexes);
            patterns_len = &(ig->slash_regexes_len);
            pattern++;
            pattern_len--;
        } else {
            patterns_p = &(ig->regexes);
            patterns_len = &(ig->regexes_len);
        }
    } else {
        if (pattern[0] == '/') {
            patterns_p = &(ig->slash_names);
            patterns_len = &(ig->slash_names_len);
            pattern++;
            pattern_len--;
        } else {
            patterns_p = &(ig->names);
            patterns_len = &(ig->names_len);
        }
    }

    ++*patterns_len;

    char **patterns;

    /* a balanced binary tree is best for performance, but I'm lazy */
    *patterns_p = patterns = ag_realloc(*patterns_p, (*patterns_len) * sizeof(char *));
    /* TODO: de-dupe these patterns */
    for (i = *patterns_len - 1; i > 0; i--) {
        if (strcmp(pattern, patterns[i - 1]) > 0) {
            break;
        }
        patterns[i] = patterns[i - 1];
    }
    patterns[i] = ag_strndup(pattern, pattern_len);
    log_debug("added ignore pattern %s to %s", pattern,
            ig == root_ignores ? "root ignores" : ig->abs_path);
}
Esempio n. 7
0
void load_svn_ignore_patterns(ignores *ig, const char *path) {
    FILE *fp = NULL;
    char *dir_prop_base;
    ag_asprintf(&dir_prop_base, "%s/%s", path, SVN_DIR_PROP_BASE);

    fp = fopen(dir_prop_base, "r");
    if (fp == NULL) {
        log_debug("Skipping svn ignore file %s", dir_prop_base);
        free(dir_prop_base);
        return;
    }

    char *entry = NULL;
    size_t entry_len = 0;
    char *key = ag_malloc(32); /* Sane start for max key length. */
    size_t key_len = 0;
    size_t bytes_read = 0;
    char *entry_line;
    size_t line_len;
    int matches;

    while (fscanf(fp, "K %zu\n", &key_len) == 1) {
        key = ag_realloc(key, key_len + 1);
        bytes_read = fread(key, 1, key_len, fp);
        key[key_len] = '\0';
        matches = fscanf(fp, "\nV %zu\n", &entry_len);
        if (matches != 1) {
            log_debug("Unable to parse svnignore file %s: fscanf() got %i matches, expected 1.", dir_prop_base, matches);
            goto cleanup;
        }

        if (strncmp(SVN_PROP_IGNORE, key, bytes_read) != 0) {
            log_debug("key is %s, not %s. skipping %u bytes", key, SVN_PROP_IGNORE, entry_len);
            /* Not the key we care about. fseek and repeat */
            fseek(fp, entry_len + 1, SEEK_CUR); /* +1 to account for newline. yes I know this is hacky */
            continue;
        }
        /* Aww yeah. Time to ignore stuff */
        entry = ag_malloc(entry_len + 1);
        bytes_read = fread(entry, 1, entry_len, fp);
        entry[bytes_read] = '\0';
        log_debug("entry: %s", entry);
        break;
    }
    if (entry == NULL) {
        goto cleanup;
    }
    char *patterns = entry;
    size_t patterns_len = strlen(patterns);
    while (*patterns != '\0' && patterns < (entry + bytes_read)) {
        for (line_len = 0; line_len < patterns_len; line_len++) {
            if (patterns[line_len] == '\n') {
                break;
            }
        }
        if (line_len > 0) {
            entry_line = ag_strndup(patterns, line_len);
            add_ignore_pattern(ig, entry_line);
            free(entry_line);
        }
        patterns += line_len + 1;
        patterns_len -= line_len + 1;
    }
    free(entry);
cleanup:
    free(dir_prop_base);
    free(key);
    fclose(fp);
}
Esempio n. 8
0
void search_buf(const char *buf, const int buf_len,
                const char *dir_full_path) {
    int binary = -1;  /* 1 = yes, 0 = no, -1 = don't know */
    int buf_offset = 0;

    if (opts.search_stream) {
        binary = 0;
    } else if (!opts.search_binary_files) {
        binary = is_binary((void*) buf, buf_len);
        if (binary) {
            log_debug("File %s is binary. Skipping...", dir_full_path);
            return;
        }
    }

    int matches_len = 0;
    match *matches;
    size_t matches_size;
    size_t matches_spare;

    if (opts.invert_match) {
        /* If we are going to invert the set of matches at the end, we will need
         * one extra match struct, even if there are no matches at all. So make
         * sure we have a nonempty array; and make sure we always have spare
         * capacity for one extra.
         */
        matches_size = 100;
        matches = ag_malloc(matches_size * sizeof(match));
        matches_spare = 1;
    } else {
        matches_size = 0;
        matches = NULL;
        matches_spare = 0;
    }

    if (opts.literal) {
        const char *match_ptr = buf;
        strncmp_fp ag_strnstr_fp = get_strstr(opts);

        while (buf_offset < buf_len) {
            match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup);
            if (match_ptr == NULL) {
                break;
            }

            if (opts.word_regexp) {
                const char *start = match_ptr;
                const char *end = match_ptr + opts.query_len;

                /* Check whether both start and end of the match lie on a word
                 * boundary
                 */
                if ((start == buf ||
                        is_wordchar(*(start - 1)) != opts.literal_starts_wordchar)
                        &&
                        (end == buf + buf_len ||
                         is_wordchar(*end) != opts.literal_ends_wordchar))
                {
                    /* It's a match */
                } else {
                    /* It's not a match */
                    match_ptr += opts.query_len;
                    buf_offset = end - buf;
                    continue;
                }
            }

            if ((size_t)matches_len + matches_spare >= matches_size) {
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = match_ptr - buf;
            matches[matches_len].end = matches[matches_len].start + opts.query_len;
            buf_offset = matches[matches_len].end;
            log_debug("Match found. File %s, offset %i bytes.", dir_full_path, matches[matches_len].start);
            matches_len++;
            match_ptr += opts.query_len;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    } else {
        int rc;
        int offset_vector[3];
        while (buf_offset < buf_len &&
                (rc = pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
            log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
            buf_offset = offset_vector[1];

            /* TODO: copy-pasted from above. FIXME */
            if ((size_t)matches_len + matches_spare >= matches_size) {
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = offset_vector[0];
            matches[matches_len].end = offset_vector[1];
            matches_len++;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    }

    if (opts.invert_match) {
        matches_len = invert_matches(matches, matches_len, buf_len);
    }

    if (opts.stats) {
        pthread_mutex_lock(&stats_mtx);
        stats.total_bytes += buf_len;
        stats.total_files++;
        stats.total_matches += matches_len;
        pthread_mutex_unlock(&stats_mtx);
    }

    if (matches_len > 0) {
        if (binary == -1 && !opts.print_filename_only) {
            binary = is_binary((void*) buf, buf_len);
        }
        pthread_mutex_lock(&print_mtx);
        if (opts.print_filename_only) {
            print_path(dir_full_path, '\n');
        } else if (binary) {
            print_binary_file_matches(dir_full_path);
        } else {
            print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
        }
        pthread_mutex_unlock(&print_mtx);
    } else {
        log_debug("No match in %s", dir_full_path);
    }

    if (matches_size > 0) {
        free(matches);
    }
}
Esempio n. 9
0
void search_buf(const char *buf, const size_t buf_len,
                const char *dir_full_path) {
    int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */
    size_t buf_offset = 0;

    if (opts.search_stream) {
        binary = 0;
    } else if (!opts.search_binary_files) {
        binary = is_binary((const void *)buf, buf_len);
        if (binary) {
            log_debug("File %s is binary. Skipping...", dir_full_path);
            return;
        }
    }

    int matches_len = 0;
    match *matches;
    size_t matches_size;
    size_t matches_spare;

    if (opts.invert_match) {
        /* If we are going to invert the set of matches at the end, we will need
         * one extra match struct, even if there are no matches at all. So make
         * sure we have a nonempty array; and make sure we always have spare
         * capacity for one extra.
         */
        matches_size = 100;
        matches = ag_malloc(matches_size * sizeof(match));
        matches_spare = 1;
    } else {
        matches_size = 0;
        matches = NULL;
        matches_spare = 0;
    }

    if (opts.query_len == 1 && opts.query[0] == '.') {
        matches_size = 1;
        matches = ag_malloc(matches_size * sizeof(match));
        matches[0].start = 0;
        matches[0].end = buf_len;
        matches_len = 1;
    } else if (opts.literal) {
        const char *match_ptr = buf;
        strncmp_fp ag_strnstr_fp = get_strstr(opts.casing);

        while (buf_offset < buf_len) {
            match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, skip_lookup);
            if (match_ptr == NULL) {
                break;
            }

            if (opts.word_regexp) {
                const char *start = match_ptr;
                const char *end = match_ptr + opts.query_len;

                /* Check whether both start and end of the match lie on a word
                 * boundary
                 */
                if ((start == buf ||
                     is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) &&
                    (end == buf + buf_len ||
                     is_wordchar(*end) != opts.literal_ends_wordchar)) {
                    /* It's a match */
                } else {
                    /* It's not a match */
                    match_ptr += opts.query_len;
                    buf_offset = end - buf;
                    continue;
                }
            }

            if ((size_t)matches_len + matches_spare >= matches_size) {
                /* TODO: benchmark initial size of matches. 100 may be too small/big */
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = match_ptr - buf;
            matches[matches_len].end = matches[matches_len].start + opts.query_len;
            buf_offset = matches[matches_len].end;
            log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start);
            matches_len++;
            match_ptr += opts.query_len;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    } else {
        int offset_vector[3];
        while (buf_offset < buf_len &&
               (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
            log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
            buf_offset = offset_vector[1];

            /* TODO: copy-pasted from above. FIXME */
            if ((size_t)matches_len + matches_spare >= matches_size) {
                matches_size = matches ? matches_size * 2 : 100;
                log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
                matches = ag_realloc(matches, matches_size * sizeof(match));
            }

            matches[matches_len].start = offset_vector[0];
            matches[matches_len].end = offset_vector[1];
            matches_len++;

            if (matches_len >= opts.max_matches_per_file) {
                log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
                break;
            }
        }
    }

    if (opts.invert_match) {
        matches_len = invert_matches(buf, buf_len, matches, matches_len);
    }

    if (opts.stats) {
        pthread_mutex_lock(&stats_mtx);
        stats.total_bytes += buf_len;
        stats.total_files++;
        stats.total_matches += matches_len;
        pthread_mutex_unlock(&stats_mtx);
    }

    if (matches_len > 0) {
        if (binary == -1 && !opts.print_filename_only) {
            binary = is_binary((const void *)buf, buf_len);
        }
        pthread_mutex_lock(&print_mtx);
        if (opts.print_filename_only) {
            /* If the --files-without-matches or -L option in passed we should
             * not print a matching line. This option currently sets
             * opts.print_filename_only and opts.invert_match. Unfortunately
             * setting the latter has the side effect of making matches.len = 1
             * on a file-without-matches which is not desired behaviour. See
             * GitHub issue 206 for the consequences if this behaviour is not
             * checked. */
            if (!opts.invert_match || matches_len < 2) {
                print_path(dir_full_path, '\n');
            }
        } else if (binary) {
            print_binary_file_matches(dir_full_path);
        } else {
            print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
        }
        pthread_mutex_unlock(&print_mtx);
    } else {
        log_debug("No match in %s", dir_full_path);
    }

    if (matches_size > 0) {
        free(matches);
    }
}
Esempio n. 10
0
void search_buf(char *buf, size_t buf_len,
	const char *dir_full_path, char *tmp_file_path) {
	int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */
	size_t buf_offset = 0;

	//if (opts.search_stream) {
	//	binary = 0;
	//}
	//else if (!opts.search_binary_files) {
	//	binary = is_binary(buf, buf_len);

	//	if (binary) {
	//		log_debug("File %s is binary. Skipping...", dir_full_path);
	//		if (!convert_to_text(&buf, &buf_len, dir_full_path, tmp_file_path))
	//			return;
	//	}
	//}

	binary = is_binary(buf, buf_len);

	if (binary) {
		log_debug("File %s is binary. Convert to text...", dir_full_path);
		if (!convert_to_text(&buf, &buf_len, dir_full_path, tmp_file_path))
			return;
	}
	size_t matches_len = 0;
	match_t *matches;
	size_t matches_size;
	size_t matches_spare;

	//if (opts.invert_match) {
	//	/* If we are going to invert the set of matches at the end, we will need
	//	* one extra match struct, even if there are no matches at all. So make
	//	* sure we have a nonempty array; and make sure we always have spare
	//	* capacity for one extra.
	//	*/
	//	matches_size = 100;
	//	matches = (match_t *)ag_malloc(matches_size * sizeof(match_t));
	//	matches_spare = 1;
	//}
	//else {
	//	matches_size = 0;
	//	matches = NULL;
	//	matches_spare = 0;
	//}
	matches_size = 0;
	matches = NULL;
	matches_spare = 0;

	if (/*!opts.literal && */opts.query_len == 1 && opts.query[0] == '.') {
		matches_size = 1;
		matches = (match_t *)ag_malloc(matches_size * sizeof(match_t));
		matches[0].start = 0;
		matches[0].end = buf_len;
		matches_len = 1;
	}
	else {
		const char *match_ptr = buf;
		strncmp_fp ag_strnstr_fp = get_strstr(opts.casing);

		while (buf_offset < buf_len) {
			match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, alpha_skip_lookup, find_skip_lookup);
			if (match_ptr == NULL) {
				break;
			}

			if (opts.word_regexp) {
				const char *start = match_ptr;
				const char *end = match_ptr + opts.query_len;

				/* Check whether both start and end of the match lie on a word
				* boundary
				*/
				if ((start == buf ||
					is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) &&
					(end == buf + buf_len ||
					is_wordchar(*end) != opts.literal_ends_wordchar)) {
					/* It's a match */
				}
				else {
					/* It's not a match */
					match_ptr += opts.query_len;
					buf_offset = end - buf;
					continue;
				}
			}

			if (matches_len + matches_spare >= matches_size) {
				/* TODO: benchmark initial size of matches. 100 may be too small/big */
				matches_size = matches ? matches_size * 2 : 100;
				log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
				matches = (match_t *)ag_realloc(matches, matches_size * sizeof(match_t));
			}

			matches[matches_len].start = match_ptr - buf;
			matches[matches_len].end = matches[matches_len].start + opts.query_len;
			buf_offset = matches[matches_len].end;
			log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start);
			matches_len++;
			match_ptr += opts.query_len;

			if (opts.max_matches_per_file > 0 && matches_len >= opts.max_matches_per_file) {
				log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
				break;
			}
		}
	}
	else {