Beispiel #1
0
void crawler::crawl(std::string &path) {
    std::string normalized_path = vmprobe::path::normalize(path);

    file_index already_seen_files;

    _crawl(normalized_path, already_seen_files);
}
static void
_test_empty(void)
{
    gchar *basedir = g_strdup_printf("%s/try-%u", global_base, _next_id());
    if (0 != g_mkdir_with_parents(basedir, 0700))
        g_error("Try failure [%s] : (%d) %s", basedir, errno, strerror(errno));
    _crawl(basedir);
    g_free(basedir);
}
static void
_test_simple(void)
{
    static struct citizen_s citizens[] = {
        {"/", "", 0400},
        {"/", "plop", 0600},
        {"/", "plip", 0600},
        {"/", "plup", 0600},
        {"/", "plap", 0600},
        {NULL, NULL, 0}
    };
    gchar *basedir = g_strdup_printf("%s/try-%u", global_base, _next_id());
    if (0 != g_mkdir_with_parents(basedir, 0700))
        g_error("Try failure [%s] : (%d) %s", basedir, errno, strerror(errno));
    _populate(basedir, citizens);
    _crawl(basedir);
    g_free(basedir);
}
Beispiel #4
0
void crawler::_crawl(std::string &path_std_string, file_index &already_seen_files) {
    const char *path = path_std_string.c_str();
    struct ::stat sb;

    int res = follow_symlinks ? stat(path, &sb) : lstat(path, &sb);

    if (res) {
        warning("unable to stat %s (%s)", path, strerror(errno));
        return;
    }

    if (S_ISLNK(sb.st_mode)) {
        warning("not following symbolic link %s", path);
        return;
    }

    if (skip_duplicate_hardlinks && sb.st_nlink > 1) {
        file_index::iterator dev = already_seen_files.find(sb.st_dev);

        // Haven't seen this device before, initialize it
        if (dev == already_seen_files.end()) {
            already_seen_files.emplace(sb.st_dev, std::unordered_set<ino_t> {sb.st_ino});
        }
        // Already seen this inode before
        else if (!dev->second.insert(sb.st_ino).second) {
            warning("skipping duplicate hardlink %s", path);
            return;
        }
    }

    if (S_ISDIR(sb.st_mode)) {
        if (curr_crawl_depth == max_crawl_depth) {
            warning("maximum directory crawl depth reached: %s", path);
            return;
        }

        num_dirs++;

        retry_opendir:

        DIR *dirp = opendir(path);

        if (dirp == NULL) {
            if (errno == ENFILE || errno == EMFILE) {
                increment_nofile_rlimit();
                goto retry_opendir;
            }

            warning("unable to opendir %s (%s), skipping", path, strerror(errno));
            return;
        }

        struct dirent *de;
        std::vector<std::string> dir_entries;

        while((de = readdir(dirp)) != NULL) {
            if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue;

            dir_entries.emplace_back(de->d_name);
        }

        if (closedir(dirp)) fatal("unable to closedir %s (%s)", path, strerror(errno));

        std::sort(dir_entries.begin(), dir_entries.end());

        for (auto &s : dir_entries) { 
            std::string npath = path_std_string + std::string("/") + s;

            curr_crawl_depth++;
            _crawl(npath, already_seen_files);
            curr_crawl_depth--;
        }
    } else if (S_ISREG(sb.st_mode)) {
        num_files++;
        // FIXME: catch exceptions thrown by the callback
        file_handler(path_std_string, sb);
    } else {
        warning("skipping non-regular file: %s", path);
    }
}