// if there are any entries that are obsoleted by a recursive insert, // walk over them now and mark them as ignored. void PendingCollectionBase::maybePruneObsoletedChildren( w_string path, int flags) { if ((flags & (W_PENDING_RECURSIVE | W_PENDING_CRAWL_ONLY)) == W_PENDING_RECURSIVE) { iterContext ctx{path, *this}; uint32_t pruned = 0; // Since deletion invalidates the iterator, we need to repeatedly // call this to prune out the nodes. It will return 0 once no // matching prefixes are found and deleted. while (tree_.iterPrefix((const uint8_t*)path.data(), path.size(), ctx)) { // OK; try again ++pruned; } if (pruned) { w_log( W_LOG_DBG, "maybePruneObsoletedChildren: pruned %u nodes under (%d) %.*s\n", pruned, int(path.size()), int(path.size()), path.data()); } } }
watchman_root::watchman_root(const w_string& root_path) : root_path(root_path), case_sensitive(is_case_sensitive_filesystem(root_path.c_str())), cookies(root_path), config_file(load_root_config(root_path.c_str())), config(config_file), trigger_settle(int(config.getInt("settle", DEFAULT_SETTLE_PERIOD))), gc_interval( int(config.getInt("gc_interval_seconds", DEFAULT_GC_INTERVAL))), gc_age(int(config.getInt("gc_age_seconds", DEFAULT_GC_AGE))), idle_reap_age( int(config.getInt("idle_reap_age_seconds", DEFAULT_REAP_AGE))), unilateralResponses(std::make_shared<watchman::Publisher>()) { ++live_roots; applyIgnoreConfiguration(); applyIgnoreVCSConfiguration(); init(); }
// Given a target of the form "absolute_path/filename", return // realpath(absolute_path) + filename, where realpath(absolute_path) resolves // all the symlinks in absolute_path. static w_string get_normalized_target(const w_string& target) { int err; w_assert( w_string_path_is_absolute(target), "get_normalized_target: path %s is not absolute\n", target.c_str()); auto dir_name = target.dirName(); auto dir_name_real = realPath(dir_name.c_str()); err = errno; if (dir_name_real) { auto file_name = target.baseName(); return w_string::pathCat({dir_name_real, file_name}); } errno = err; return nullptr; }
// Check the tree to see if there is a path that is earlier/higher in the // filesystem than the input path; if there is, and it is recursive, // return true to indicate that there is no need to track this new path // due to the already scheduled higher level path. bool PendingCollectionBase::isObsoletedByContainingDir(const w_string& path) { auto leaf = tree_.longestMatch((const uint8_t*)path.data(), path.size()); if (!leaf) { return false; } auto p = leaf->value; if ((p->flags & W_PENDING_RECURSIVE) && is_path_prefix( path.data(), path.size(), (const char*)leaf->key.data(), leaf->key.size())) { if (watchman::CookieSync::isPossiblyACookie(path)) { return false; } // Yes: the pre-existing entry higher up in the tree obsoletes this // one that we would add now. w_log( W_LOG_DBG, "is_obsoleted: SKIP %.*s is obsoleted by %.*s\n", int(path.size()), path.data(), int(p->path.size()), p->path.data()); return true; } return false; }
// This is the iterator callback we use to prune out obsoleted leaves. // We need to compare the prefix to make sure that we don't delete // a sibling node by mistake (see commentary on the is_path_prefix // function for more on that). int PendingCollectionBase::iterContext::operator()( const w_string& key, std::shared_ptr<watchman_pending_fs>& p) { if (!p) { // It was removed; update the tree to reflect this coll.tree_.erase(key); // Stop iteration: we deleted something and invalidated the iterators. return 1; } if ((p->flags & W_PENDING_CRAWL_ONLY) == 0 && key.size() > root.size() && is_path_prefix( (const char*)key.data(), key.size(), root.data(), root.size()) && !watchman::CookieSync::isPossiblyACookie(p->path)) { w_log( W_LOG_DBG, "delete_kids: removing (%d) %.*s from pending because it is " "obsoleted by (%d) %.*s\n", int(p->path.size()), int(p->path.size()), p->path.data(), int(root.size()), int(root.size()), root.data()); // Unlink the child from the pending index. coll.unlinkItem(p); // Remove it from the art tree. coll.tree_.erase(key); // Stop iteration because we just invalidated the iterator state // by modifying the tree mid-iteration. return 1; } return 0; }
/* add a pending entry. Will consolidate an existing entry with the * same name. Returns false if an allocation fails. * The caller must own the collection lock. */ bool PendingCollectionBase::add( const w_string& path, struct timeval now, int flags) { char flags_label[128]; auto existing = tree_.search(path); if (existing) { /* Entry already exists: consolidate */ consolidateItem(existing->get(), flags); /* all done */ return true; } if (isObsoletedByContainingDir(path)) { return true; } // Try to allocate the new node before we prune any children. auto p = std::make_shared<watchman_pending_fs>(path, now, flags); maybePruneObsoletedChildren(path, flags); w_expand_flags(kflags, flags, flags_label, sizeof(flags_label)); w_log( W_LOG_DBG, "add_pending: %.*s %s\n", int(path.size()), path.data(), flags_label); tree_.insert(path, p); linkHead(std::move(p)); return true; }
// Requires target to be an absolute path static void watch_symlink_target(const w_string& target, json_t* root_files) { w_assert( w_string_path_is_absolute(target), "watch_symlink_target: path %s is not absolute\n", target.c_str()); w_string normalized_target; try { normalized_target = get_normalized_target(target); } catch (const std::system_error& exc) { watchman::log( watchman::ERR, "watch_symlink_target: unable to get normalized version of target `", target, "`; realpath ", exc.what(), "\n"); return; } w_string_piece relpath; w_string_piece watched_root; bool enclosing = findEnclosingRoot(normalized_target, watched_root, relpath); if (!enclosing) { w_string_piece resolved(normalized_target); if (!find_project_root(root_files, resolved, relpath)) { watchman::log( watchman::ERR, "watch_symlink_target: No watchable root for ", resolved, "\n"); } else { char* errmsg = nullptr; SCOPE_EXIT{ free(errmsg); }; auto root = w_root_resolve(resolved.asWString().c_str(), true, &errmsg); if (!root) { watchman::log( watchman::ERR, "watch_symlink_target: unable to watch ", resolved, ": ", errmsg, "\n"); } } }
void InMemoryView::crawler( const std::shared_ptr<w_root_t>& root, SyncView::LockedPtr& view, PendingCollection::LockedPtr& coll, const w_string& dir_name, struct timeval now, bool recursive) { struct watchman_file *file; const watchman_dir_ent* dirent; char path[WATCHMAN_NAME_MAX]; bool stat_all = false; if (watcher_->flags & WATCHER_HAS_PER_FILE_NOTIFICATIONS) { stat_all = watcher_->flags & WATCHER_COALESCED_RENAME; } else { // If the watcher doesn't give us per-file notifications for // watched dirs, then we'll end up explicitly tracking them // and will get updates for the files explicitly. // We don't need to look at the files again when we crawl stat_all = false; } auto dir = resolveDir(view, dir_name, true); // Detect root directory replacement. // The inode number check is handled more generally by the sister code // in stat.cpp. We need to special case it for the root because we never // generate a watchman_file node for the root and thus never call // InMemoryView::statPath (we'll fault if we do!). // Ideally the kernel would have given us a signal when we've been replaced // but some filesystems (eg: BTRFS) do not emit appropriate inotify events // for things like subvolume deletes. We've seen situations where the // root has been replaced and we got no notifications at all and this has // left the cookie sync mechanism broken forever. if (dir_name == root->root_path) { try { auto st = getFileInformation(dir_name.c_str(), root->case_sensitive); if (st.ino != view->rootInode) { // If it still exists and the inode doesn't match, then we need // to force recrawl to make sure we're in sync. // We're lazily initializing the rootInode to 0 here, so we don't // need to do this the first time through (we're already crawling // everything in that case). if (view->rootInode != 0) { root->scheduleRecrawl( "root was replaced and we didn't get notified by the kernel"); return; } recursive = true; view->rootInode = st.ino; } } catch (const std::system_error& err) { handle_open_errno(root, dir, now, "getFileInformation", err.code()); markDirDeleted(view, dir, now, true); return; } } memcpy(path, dir_name.data(), dir_name.size()); path[dir_name.size()] = 0; w_log(W_LOG_DBG, "opendir(%s) recursive=%s\n", path, recursive ? "true" : "false"); /* Start watching and open the dir for crawling. * Whether we open the dir prior to watching or after is watcher specific, * so the operations are rolled together in our abstraction */ std::unique_ptr<watchman_dir_handle> osdir; try { osdir = watcher_->startWatchDir(root, dir, path); } catch (const std::system_error& err) { handle_open_errno(root, dir, now, "opendir", err.code()); markDirDeleted(view, dir, now, true); return; } if (dir->files.empty()) { // Pre-size our hash(es) if we can, so that we can avoid collisions // and re-hashing during initial crawl uint32_t num_dirs = 0; #ifndef _WIN32 struct stat st; int dfd = osdir->getFd(); if (dfd != -1 && fstat(dfd, &st) == 0) { num_dirs = (uint32_t)st.st_nlink; } #endif // st.st_nlink is usually number of dirs + 2 (., ..). // If it is less than 2 then it doesn't follow that convention. // We just pass it through for the dir size hint and the hash // table implementation will round that up to the next power of 2 apply_dir_size_hint( dir, num_dirs, uint32_t(root->config.getInt("hint_num_files_per_dir", 64))); } /* flag for delete detection */ for (auto& it : dir->files) { auto file = it.second.get(); if (file->exists) { file->maybe_deleted = true; } } try { while ((dirent = osdir->readDir()) != nullptr) { // Don't follow parent/self links if (dirent->d_name[0] == '.' && (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))) { continue; } // Queue it up for analysis if the file is newly existing w_string name(dirent->d_name, W_STRING_BYTE); file = dir->getChildFile(name); if (file) { file->maybe_deleted = false; } if (!file || !file->exists || stat_all || recursive) { auto full_path = dir->getFullPathToChild(name); w_log( W_LOG_DBG, "in crawler calling process_path on %s\n", full_path.c_str()); processPath( root, view, coll, full_path, now, ((recursive || !file || !file->exists) ? W_PENDING_RECURSIVE : 0), dirent); } } } catch (const std::system_error& exc) { log(ERR, "Error while reading dir ", path, ": ", exc.what(), ", re-adding to pending list to re-assess\n"); coll->add(path, now, 0); } osdir.reset(); // Anything still in maybe_deleted is actually deleted. // Arrange to re-process it shortly for (auto& it : dir->files) { auto file = it.second.get(); if (file->exists && (file->maybe_deleted || (file->stat.isDir() && recursive))) { coll->add( dir, file->getName().data(), now, recursive ? W_PENDING_RECURSIVE : 0); } } }
static std::unique_ptr<QueryExpr> parse(w_query*, const json_ref& term, CaseSensitivity caseSensitive) { const char *pattern = nullptr, *scope = "basename"; const char *which = caseSensitive == CaseSensitivity::CaseInSensitive ? "iname" : "name"; std::unordered_set<w_string> set; if (!term.isArray()) { throw QueryParseError("Expected array for '", which, "' term"); } if (json_array_size(term) > 3) { throw QueryParseError( "Invalid number of arguments for '", which, "' term"); } if (json_array_size(term) == 3) { const auto& jscope = term.at(2); if (!jscope.isString()) { throw QueryParseError("Argument 3 to '", which, "' must be a string"); } scope = json_string_value(jscope); if (strcmp(scope, "basename") && strcmp(scope, "wholename")) { throw QueryParseError( "Invalid scope '", scope, "' for ", which, " expression"); } } const auto& name = term.at(1); if (name.isArray()) { uint32_t i; for (i = 0; i < json_array_size(name); i++) { if (!json_array_get(name, i).isString()) { throw QueryParseError( "Argument 2 to '", which, "' must be either a string or an array of string"); } } set.reserve(json_array_size(name)); for (i = 0; i < json_array_size(name); i++) { w_string element; const auto& jele = name.at(i); auto ele = json_to_w_string(jele); if (caseSensitive == CaseSensitivity::CaseInSensitive) { element = ele.piece().asLowerCase(ele.type()).normalizeSeparators(); } else { element = ele.normalizeSeparators(); } set.insert(element); } } else if (name.isString()) { pattern = json_string_value(name); } else { throw QueryParseError( "Argument 2 to '", which, "' must be either a string or an array of string"); } auto data = new NameExpr(std::move(set), caseSensitive, !strcmp(scope, "wholename")); if (pattern) { data->name = json_to_w_string(name).normalizeSeparators(); } return std::unique_ptr<QueryExpr>(data); }