Beispiel #1
0
// if there are any entries that are obsoleted by a recursive insert,
// walk over them now and mark them as ignored.
void PendingCollectionBase::maybePruneObsoletedChildren(
    w_string path,
    int flags) {
  if ((flags & (W_PENDING_RECURSIVE | W_PENDING_CRAWL_ONLY)) ==
      W_PENDING_RECURSIVE) {
    iterContext ctx{path, *this};
    uint32_t pruned = 0;
    // Since deletion invalidates the iterator, we need to repeatedly
    // call this to prune out the nodes.  It will return 0 once no
    // matching prefixes are found and deleted.
    while (tree_.iterPrefix((const uint8_t*)path.data(), path.size(), ctx)) {
      // OK; try again
      ++pruned;
    }

    if (pruned) {
      w_log(
          W_LOG_DBG,
          "maybePruneObsoletedChildren: pruned %u nodes under (%d) %.*s\n",
          pruned,
          int(path.size()),
          int(path.size()),
          path.data());
    }
  }
}
Beispiel #2
0
// Check the tree to see if there is a path that is earlier/higher in the
// filesystem than the input path; if there is, and it is recursive,
// return true to indicate that there is no need to track this new path
// due to the already scheduled higher level path.
bool PendingCollectionBase::isObsoletedByContainingDir(const w_string& path) {
  auto leaf = tree_.longestMatch((const uint8_t*)path.data(), path.size());
  if (!leaf) {
    return false;
  }
  auto p = leaf->value;

  if ((p->flags & W_PENDING_RECURSIVE) && is_path_prefix(
                                              path.data(),
                                              path.size(),
                                              (const char*)leaf->key.data(),
                                              leaf->key.size())) {
    if (watchman::CookieSync::isPossiblyACookie(path)) {
      return false;
    }

    // Yes: the pre-existing entry higher up in the tree obsoletes this
    // one that we would add now.
    w_log(
        W_LOG_DBG,
        "is_obsoleted: SKIP %.*s is obsoleted by %.*s\n",
        int(path.size()),
        path.data(),
        int(p->path.size()),
        p->path.data());
    return true;
  }
  return false;
}
Beispiel #3
0
// This is the iterator callback we use to prune out obsoleted leaves.
// We need to compare the prefix to make sure that we don't delete
// a sibling node by mistake (see commentary on the is_path_prefix
// function for more on that).
int PendingCollectionBase::iterContext::operator()(
    const w_string& key,
    std::shared_ptr<watchman_pending_fs>& p) {
  if (!p) {
    // It was removed; update the tree to reflect this
    coll.tree_.erase(key);
    // Stop iteration: we deleted something and invalidated the iterators.
    return 1;
  }

  if ((p->flags & W_PENDING_CRAWL_ONLY) == 0 && key.size() > root.size() &&
      is_path_prefix(
          (const char*)key.data(), key.size(), root.data(), root.size()) &&
      !watchman::CookieSync::isPossiblyACookie(p->path)) {
    w_log(
        W_LOG_DBG,
        "delete_kids: removing (%d) %.*s from pending because it is "
        "obsoleted by (%d) %.*s\n",
        int(p->path.size()),
        int(p->path.size()),
        p->path.data(),
        int(root.size()),
        int(root.size()),
        root.data());

    // Unlink the child from the pending index.
    coll.unlinkItem(p);

    // Remove it from the art tree.
    coll.tree_.erase(key);

    // Stop iteration because we just invalidated the iterator state
    // by modifying the tree mid-iteration.
    return 1;
  }

  return 0;
}
Beispiel #4
0
/* add a pending entry.  Will consolidate an existing entry with the
 * same name.  Returns false if an allocation fails.
 * The caller must own the collection lock. */
bool PendingCollectionBase::add(
    const w_string& path,
    struct timeval now,
    int flags) {
  char flags_label[128];

  auto existing = tree_.search(path);
  if (existing) {
    /* Entry already exists: consolidate */
    consolidateItem(existing->get(), flags);
    /* all done */
    return true;
  }

  if (isObsoletedByContainingDir(path)) {
    return true;
  }

  // Try to allocate the new node before we prune any children.
  auto p = std::make_shared<watchman_pending_fs>(path, now, flags);

  maybePruneObsoletedChildren(path, flags);

  w_expand_flags(kflags, flags, flags_label, sizeof(flags_label));
  w_log(
      W_LOG_DBG,
      "add_pending: %.*s %s\n",
      int(path.size()),
      path.data(),
      flags_label);

  tree_.insert(path, p);
  linkHead(std::move(p));

  return true;
}
Beispiel #5
0
void InMemoryView::crawler(
    const std::shared_ptr<w_root_t>& root,
    SyncView::LockedPtr& view,
    PendingCollection::LockedPtr& coll,
    const w_string& dir_name,
    struct timeval now,
    bool recursive) {
  struct watchman_file *file;
  const watchman_dir_ent* dirent;
  char path[WATCHMAN_NAME_MAX];
  bool stat_all = false;

  if (watcher_->flags & WATCHER_HAS_PER_FILE_NOTIFICATIONS) {
    stat_all = watcher_->flags & WATCHER_COALESCED_RENAME;
  } else {
    // If the watcher doesn't give us per-file notifications for
    // watched dirs, then we'll end up explicitly tracking them
    // and will get updates for the files explicitly.
    // We don't need to look at the files again when we crawl
    stat_all = false;
  }

  auto dir = resolveDir(view, dir_name, true);

  // Detect root directory replacement.
  // The inode number check is handled more generally by the sister code
  // in stat.cpp.  We need to special case it for the root because we never
  // generate a watchman_file node for the root and thus never call
  // InMemoryView::statPath (we'll fault if we do!).
  // Ideally the kernel would have given us a signal when we've been replaced
  // but some filesystems (eg: BTRFS) do not emit appropriate inotify events
  // for things like subvolume deletes.  We've seen situations where the
  // root has been replaced and we got no notifications at all and this has
  // left the cookie sync mechanism broken forever.
  if (dir_name == root->root_path) {
    try {
      auto st = getFileInformation(dir_name.c_str(), root->case_sensitive);
      if (st.ino != view->rootInode) {
        // If it still exists and the inode doesn't match, then we need
        // to force recrawl to make sure we're in sync.
        // We're lazily initializing the rootInode to 0 here, so we don't
        // need to do this the first time through (we're already crawling
        // everything in that case).
        if (view->rootInode != 0) {
          root->scheduleRecrawl(
              "root was replaced and we didn't get notified by the kernel");
          return;
        }
        recursive = true;
        view->rootInode = st.ino;
      }
    } catch (const std::system_error& err) {
      handle_open_errno(root, dir, now, "getFileInformation", err.code());
      markDirDeleted(view, dir, now, true);
      return;
    }
  }

  memcpy(path, dir_name.data(), dir_name.size());
  path[dir_name.size()] = 0;

  w_log(W_LOG_DBG, "opendir(%s) recursive=%s\n",
      path, recursive ? "true" : "false");

  /* Start watching and open the dir for crawling.
   * Whether we open the dir prior to watching or after is watcher specific,
   * so the operations are rolled together in our abstraction */
  std::unique_ptr<watchman_dir_handle> osdir;

  try {
    osdir = watcher_->startWatchDir(root, dir, path);
  } catch (const std::system_error& err) {
    handle_open_errno(root, dir, now, "opendir", err.code());
    markDirDeleted(view, dir, now, true);
    return;
  }

  if (dir->files.empty()) {
    // Pre-size our hash(es) if we can, so that we can avoid collisions
    // and re-hashing during initial crawl
    uint32_t num_dirs = 0;
#ifndef _WIN32
    struct stat st;
    int dfd = osdir->getFd();
    if (dfd != -1 && fstat(dfd, &st) == 0) {
      num_dirs = (uint32_t)st.st_nlink;
    }
#endif
    // st.st_nlink is usually number of dirs + 2 (., ..).
    // If it is less than 2 then it doesn't follow that convention.
    // We just pass it through for the dir size hint and the hash
    // table implementation will round that up to the next power of 2
    apply_dir_size_hint(
        dir,
        num_dirs,
        uint32_t(root->config.getInt("hint_num_files_per_dir", 64)));
  }

  /* flag for delete detection */
  for (auto& it : dir->files) {
    auto file = it.second.get();
    if (file->exists) {
      file->maybe_deleted = true;
    }
  }

  try {
    while ((dirent = osdir->readDir()) != nullptr) {
      // Don't follow parent/self links
      if (dirent->d_name[0] == '.' &&
          (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))) {
        continue;
      }

      // Queue it up for analysis if the file is newly existing
      w_string name(dirent->d_name, W_STRING_BYTE);
      file = dir->getChildFile(name);
      if (file) {
        file->maybe_deleted = false;
      }
      if (!file || !file->exists || stat_all || recursive) {
        auto full_path = dir->getFullPathToChild(name);
        w_log(
            W_LOG_DBG,
            "in crawler calling process_path on %s\n",
            full_path.c_str());
        processPath(
            root,
            view,
            coll,
            full_path,
            now,
            ((recursive || !file || !file->exists) ? W_PENDING_RECURSIVE : 0),
            dirent);
      }
    }
  } catch (const std::system_error& exc) {
    log(ERR,
        "Error while reading dir ",
        path,
        ": ",
        exc.what(),
        ", re-adding to pending list to re-assess\n");
    coll->add(path, now, 0);
  }
  osdir.reset();

  // Anything still in maybe_deleted is actually deleted.
  // Arrange to re-process it shortly
  for (auto& it : dir->files) {
    auto file = it.second.get();
    if (file->exists &&
        (file->maybe_deleted || (file->stat.isDir() && recursive))) {
      coll->add(
          dir,
          file->getName().data(),
          now,
          recursive ? W_PENDING_RECURSIVE : 0);
    }
  }
}