// if there are any entries that are obsoleted by a recursive insert, // walk over them now and mark them as ignored. void PendingCollectionBase::maybePruneObsoletedChildren( w_string path, int flags) { if ((flags & (W_PENDING_RECURSIVE | W_PENDING_CRAWL_ONLY)) == W_PENDING_RECURSIVE) { iterContext ctx{path, *this}; uint32_t pruned = 0; // Since deletion invalidates the iterator, we need to repeatedly // call this to prune out the nodes. It will return 0 once no // matching prefixes are found and deleted. while (tree_.iterPrefix((const uint8_t*)path.data(), path.size(), ctx)) { // OK; try again ++pruned; } if (pruned) { w_log( W_LOG_DBG, "maybePruneObsoletedChildren: pruned %u nodes under (%d) %.*s\n", pruned, int(path.size()), int(path.size()), path.data()); } } }
// Check the tree to see if there is a path that is earlier/higher in the // filesystem than the input path; if there is, and it is recursive, // return true to indicate that there is no need to track this new path // due to the already scheduled higher level path. bool PendingCollectionBase::isObsoletedByContainingDir(const w_string& path) { auto leaf = tree_.longestMatch((const uint8_t*)path.data(), path.size()); if (!leaf) { return false; } auto p = leaf->value; if ((p->flags & W_PENDING_RECURSIVE) && is_path_prefix( path.data(), path.size(), (const char*)leaf->key.data(), leaf->key.size())) { if (watchman::CookieSync::isPossiblyACookie(path)) { return false; } // Yes: the pre-existing entry higher up in the tree obsoletes this // one that we would add now. w_log( W_LOG_DBG, "is_obsoleted: SKIP %.*s is obsoleted by %.*s\n", int(path.size()), path.data(), int(p->path.size()), p->path.data()); return true; } return false; }
// This is the iterator callback we use to prune out obsoleted leaves. // We need to compare the prefix to make sure that we don't delete // a sibling node by mistake (see commentary on the is_path_prefix // function for more on that). int PendingCollectionBase::iterContext::operator()( const w_string& key, std::shared_ptr<watchman_pending_fs>& p) { if (!p) { // It was removed; update the tree to reflect this coll.tree_.erase(key); // Stop iteration: we deleted something and invalidated the iterators. return 1; } if ((p->flags & W_PENDING_CRAWL_ONLY) == 0 && key.size() > root.size() && is_path_prefix( (const char*)key.data(), key.size(), root.data(), root.size()) && !watchman::CookieSync::isPossiblyACookie(p->path)) { w_log( W_LOG_DBG, "delete_kids: removing (%d) %.*s from pending because it is " "obsoleted by (%d) %.*s\n", int(p->path.size()), int(p->path.size()), p->path.data(), int(root.size()), int(root.size()), root.data()); // Unlink the child from the pending index. coll.unlinkItem(p); // Remove it from the art tree. coll.tree_.erase(key); // Stop iteration because we just invalidated the iterator state // by modifying the tree mid-iteration. return 1; } return 0; }
/* add a pending entry. Will consolidate an existing entry with the * same name. Returns false if an allocation fails. * The caller must own the collection lock. */ bool PendingCollectionBase::add( const w_string& path, struct timeval now, int flags) { char flags_label[128]; auto existing = tree_.search(path); if (existing) { /* Entry already exists: consolidate */ consolidateItem(existing->get(), flags); /* all done */ return true; } if (isObsoletedByContainingDir(path)) { return true; } // Try to allocate the new node before we prune any children. auto p = std::make_shared<watchman_pending_fs>(path, now, flags); maybePruneObsoletedChildren(path, flags); w_expand_flags(kflags, flags, flags_label, sizeof(flags_label)); w_log( W_LOG_DBG, "add_pending: %.*s %s\n", int(path.size()), path.data(), flags_label); tree_.insert(path, p); linkHead(std::move(p)); return true; }
void InMemoryView::crawler( const std::shared_ptr<w_root_t>& root, SyncView::LockedPtr& view, PendingCollection::LockedPtr& coll, const w_string& dir_name, struct timeval now, bool recursive) { struct watchman_file *file; const watchman_dir_ent* dirent; char path[WATCHMAN_NAME_MAX]; bool stat_all = false; if (watcher_->flags & WATCHER_HAS_PER_FILE_NOTIFICATIONS) { stat_all = watcher_->flags & WATCHER_COALESCED_RENAME; } else { // If the watcher doesn't give us per-file notifications for // watched dirs, then we'll end up explicitly tracking them // and will get updates for the files explicitly. // We don't need to look at the files again when we crawl stat_all = false; } auto dir = resolveDir(view, dir_name, true); // Detect root directory replacement. // The inode number check is handled more generally by the sister code // in stat.cpp. We need to special case it for the root because we never // generate a watchman_file node for the root and thus never call // InMemoryView::statPath (we'll fault if we do!). // Ideally the kernel would have given us a signal when we've been replaced // but some filesystems (eg: BTRFS) do not emit appropriate inotify events // for things like subvolume deletes. We've seen situations where the // root has been replaced and we got no notifications at all and this has // left the cookie sync mechanism broken forever. if (dir_name == root->root_path) { try { auto st = getFileInformation(dir_name.c_str(), root->case_sensitive); if (st.ino != view->rootInode) { // If it still exists and the inode doesn't match, then we need // to force recrawl to make sure we're in sync. // We're lazily initializing the rootInode to 0 here, so we don't // need to do this the first time through (we're already crawling // everything in that case). if (view->rootInode != 0) { root->scheduleRecrawl( "root was replaced and we didn't get notified by the kernel"); return; } recursive = true; view->rootInode = st.ino; } } catch (const std::system_error& err) { handle_open_errno(root, dir, now, "getFileInformation", err.code()); markDirDeleted(view, dir, now, true); return; } } memcpy(path, dir_name.data(), dir_name.size()); path[dir_name.size()] = 0; w_log(W_LOG_DBG, "opendir(%s) recursive=%s\n", path, recursive ? "true" : "false"); /* Start watching and open the dir for crawling. * Whether we open the dir prior to watching or after is watcher specific, * so the operations are rolled together in our abstraction */ std::unique_ptr<watchman_dir_handle> osdir; try { osdir = watcher_->startWatchDir(root, dir, path); } catch (const std::system_error& err) { handle_open_errno(root, dir, now, "opendir", err.code()); markDirDeleted(view, dir, now, true); return; } if (dir->files.empty()) { // Pre-size our hash(es) if we can, so that we can avoid collisions // and re-hashing during initial crawl uint32_t num_dirs = 0; #ifndef _WIN32 struct stat st; int dfd = osdir->getFd(); if (dfd != -1 && fstat(dfd, &st) == 0) { num_dirs = (uint32_t)st.st_nlink; } #endif // st.st_nlink is usually number of dirs + 2 (., ..). // If it is less than 2 then it doesn't follow that convention. // We just pass it through for the dir size hint and the hash // table implementation will round that up to the next power of 2 apply_dir_size_hint( dir, num_dirs, uint32_t(root->config.getInt("hint_num_files_per_dir", 64))); } /* flag for delete detection */ for (auto& it : dir->files) { auto file = it.second.get(); if (file->exists) { file->maybe_deleted = true; } } try { while ((dirent = osdir->readDir()) != nullptr) { // Don't follow parent/self links if (dirent->d_name[0] == '.' && (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))) { continue; } // Queue it up for analysis if the file is newly existing w_string name(dirent->d_name, W_STRING_BYTE); file = dir->getChildFile(name); if (file) { file->maybe_deleted = false; } if (!file || !file->exists || stat_all || recursive) { auto full_path = dir->getFullPathToChild(name); w_log( W_LOG_DBG, "in crawler calling process_path on %s\n", full_path.c_str()); processPath( root, view, coll, full_path, now, ((recursive || !file || !file->exists) ? W_PENDING_RECURSIVE : 0), dirent); } } } catch (const std::system_error& exc) { log(ERR, "Error while reading dir ", path, ": ", exc.what(), ", re-adding to pending list to re-assess\n"); coll->add(path, now, 0); } osdir.reset(); // Anything still in maybe_deleted is actually deleted. // Arrange to re-process it shortly for (auto& it : dir->files) { auto file = it.second.get(); if (file->exists && (file->maybe_deleted || (file->stat.isDir() && recursive))) { coll->add( dir, file->getName().data(), now, recursive ? W_PENDING_RECURSIVE : 0); } } }