Beispiel #1
0
// if there are any entries that are obsoleted by a recursive insert,
// walk over them now and mark them as ignored.
void PendingCollectionBase::maybePruneObsoletedChildren(
    w_string path,
    int flags) {
  if ((flags & (W_PENDING_RECURSIVE | W_PENDING_CRAWL_ONLY)) ==
      W_PENDING_RECURSIVE) {
    iterContext ctx{path, *this};
    uint32_t pruned = 0;
    // Since deletion invalidates the iterator, we need to repeatedly
    // call this to prune out the nodes.  It will return 0 once no
    // matching prefixes are found and deleted.
    while (tree_.iterPrefix((const uint8_t*)path.data(), path.size(), ctx)) {
      // OK; try again
      ++pruned;
    }

    if (pruned) {
      w_log(
          W_LOG_DBG,
          "maybePruneObsoletedChildren: pruned %u nodes under (%d) %.*s\n",
          pruned,
          int(path.size()),
          int(path.size()),
          path.data());
    }
  }
}
Beispiel #2
0
watchman_root::watchman_root(const w_string& root_path)
    : root_path(root_path),
      case_sensitive(is_case_sensitive_filesystem(root_path.c_str())),
      cookies(root_path),
      config_file(load_root_config(root_path.c_str())),
      config(config_file),
      trigger_settle(int(config.getInt("settle", DEFAULT_SETTLE_PERIOD))),
      gc_interval(
          int(config.getInt("gc_interval_seconds", DEFAULT_GC_INTERVAL))),
      gc_age(int(config.getInt("gc_age_seconds", DEFAULT_GC_AGE))),
      idle_reap_age(
          int(config.getInt("idle_reap_age_seconds", DEFAULT_REAP_AGE))),
      unilateralResponses(std::make_shared<watchman::Publisher>()) {
  ++live_roots;
  applyIgnoreConfiguration();
  applyIgnoreVCSConfiguration();
  init();
}
Beispiel #3
0
// Given a target of the form "absolute_path/filename", return
// realpath(absolute_path) + filename, where realpath(absolute_path) resolves
// all the symlinks in absolute_path.
static w_string get_normalized_target(const w_string& target) {
  int err;

  w_assert(
      w_string_path_is_absolute(target),
      "get_normalized_target: path %s is not absolute\n",
      target.c_str());

  auto dir_name = target.dirName();
  auto dir_name_real = realPath(dir_name.c_str());
  err = errno;

  if (dir_name_real) {
    auto file_name = target.baseName();
    return w_string::pathCat({dir_name_real, file_name});
  }

  errno = err;
  return nullptr;
}
Beispiel #4
0
// Check the tree to see if there is a path that is earlier/higher in the
// filesystem than the input path; if there is, and it is recursive,
// return true to indicate that there is no need to track this new path
// due to the already scheduled higher level path.
bool PendingCollectionBase::isObsoletedByContainingDir(const w_string& path) {
  auto leaf = tree_.longestMatch((const uint8_t*)path.data(), path.size());
  if (!leaf) {
    return false;
  }
  auto p = leaf->value;

  if ((p->flags & W_PENDING_RECURSIVE) && is_path_prefix(
                                              path.data(),
                                              path.size(),
                                              (const char*)leaf->key.data(),
                                              leaf->key.size())) {
    if (watchman::CookieSync::isPossiblyACookie(path)) {
      return false;
    }

    // Yes: the pre-existing entry higher up in the tree obsoletes this
    // one that we would add now.
    w_log(
        W_LOG_DBG,
        "is_obsoleted: SKIP %.*s is obsoleted by %.*s\n",
        int(path.size()),
        path.data(),
        int(p->path.size()),
        p->path.data());
    return true;
  }
  return false;
}
Beispiel #5
0
// This is the iterator callback we use to prune out obsoleted leaves.
// We need to compare the prefix to make sure that we don't delete
// a sibling node by mistake (see commentary on the is_path_prefix
// function for more on that).
int PendingCollectionBase::iterContext::operator()(
    const w_string& key,
    std::shared_ptr<watchman_pending_fs>& p) {
  if (!p) {
    // It was removed; update the tree to reflect this
    coll.tree_.erase(key);
    // Stop iteration: we deleted something and invalidated the iterators.
    return 1;
  }

  if ((p->flags & W_PENDING_CRAWL_ONLY) == 0 && key.size() > root.size() &&
      is_path_prefix(
          (const char*)key.data(), key.size(), root.data(), root.size()) &&
      !watchman::CookieSync::isPossiblyACookie(p->path)) {
    w_log(
        W_LOG_DBG,
        "delete_kids: removing (%d) %.*s from pending because it is "
        "obsoleted by (%d) %.*s\n",
        int(p->path.size()),
        int(p->path.size()),
        p->path.data(),
        int(root.size()),
        int(root.size()),
        root.data());

    // Unlink the child from the pending index.
    coll.unlinkItem(p);

    // Remove it from the art tree.
    coll.tree_.erase(key);

    // Stop iteration because we just invalidated the iterator state
    // by modifying the tree mid-iteration.
    return 1;
  }

  return 0;
}
Beispiel #6
0
/* add a pending entry.  Will consolidate an existing entry with the
 * same name.  Returns false if an allocation fails.
 * The caller must own the collection lock. */
bool PendingCollectionBase::add(
    const w_string& path,
    struct timeval now,
    int flags) {
  char flags_label[128];

  auto existing = tree_.search(path);
  if (existing) {
    /* Entry already exists: consolidate */
    consolidateItem(existing->get(), flags);
    /* all done */
    return true;
  }

  if (isObsoletedByContainingDir(path)) {
    return true;
  }

  // Try to allocate the new node before we prune any children.
  auto p = std::make_shared<watchman_pending_fs>(path, now, flags);

  maybePruneObsoletedChildren(path, flags);

  w_expand_flags(kflags, flags, flags_label, sizeof(flags_label));
  w_log(
      W_LOG_DBG,
      "add_pending: %.*s %s\n",
      int(path.size()),
      path.data(),
      flags_label);

  tree_.insert(path, p);
  linkHead(std::move(p));

  return true;
}
Beispiel #7
0
// Requires target to be an absolute path
static void watch_symlink_target(const w_string& target, json_t* root_files) {

  w_assert(
      w_string_path_is_absolute(target),
      "watch_symlink_target: path %s is not absolute\n",
      target.c_str());

  w_string normalized_target;
  try {
    normalized_target = get_normalized_target(target);
  } catch (const std::system_error& exc) {
    watchman::log(
        watchman::ERR,
        "watch_symlink_target: unable to get normalized version of target `",
        target,
        "`; realpath ",
        exc.what(),
        "\n");
    return;
  }

  w_string_piece relpath;
  w_string_piece watched_root;
  bool enclosing = findEnclosingRoot(normalized_target, watched_root, relpath);
  if (!enclosing) {
    w_string_piece resolved(normalized_target);

    if (!find_project_root(root_files, resolved, relpath)) {
      watchman::log(
          watchman::ERR,
          "watch_symlink_target: No watchable root for ",
          resolved, "\n");
    } else {
      char* errmsg = nullptr;
      SCOPE_EXIT{
        free(errmsg);
      };
      auto root = w_root_resolve(resolved.asWString().c_str(), true, &errmsg);
      if (!root) {
        watchman::log(
            watchman::ERR,
            "watch_symlink_target: unable to watch ",
            resolved, ": ", errmsg, "\n");
      }
    }
  }
Beispiel #8
0
void InMemoryView::crawler(
    const std::shared_ptr<w_root_t>& root,
    SyncView::LockedPtr& view,
    PendingCollection::LockedPtr& coll,
    const w_string& dir_name,
    struct timeval now,
    bool recursive) {
  struct watchman_file *file;
  const watchman_dir_ent* dirent;
  char path[WATCHMAN_NAME_MAX];
  bool stat_all = false;

  if (watcher_->flags & WATCHER_HAS_PER_FILE_NOTIFICATIONS) {
    stat_all = watcher_->flags & WATCHER_COALESCED_RENAME;
  } else {
    // If the watcher doesn't give us per-file notifications for
    // watched dirs, then we'll end up explicitly tracking them
    // and will get updates for the files explicitly.
    // We don't need to look at the files again when we crawl
    stat_all = false;
  }

  auto dir = resolveDir(view, dir_name, true);

  // Detect root directory replacement.
  // The inode number check is handled more generally by the sister code
  // in stat.cpp.  We need to special case it for the root because we never
  // generate a watchman_file node for the root and thus never call
  // InMemoryView::statPath (we'll fault if we do!).
  // Ideally the kernel would have given us a signal when we've been replaced
  // but some filesystems (eg: BTRFS) do not emit appropriate inotify events
  // for things like subvolume deletes.  We've seen situations where the
  // root has been replaced and we got no notifications at all and this has
  // left the cookie sync mechanism broken forever.
  if (dir_name == root->root_path) {
    try {
      auto st = getFileInformation(dir_name.c_str(), root->case_sensitive);
      if (st.ino != view->rootInode) {
        // If it still exists and the inode doesn't match, then we need
        // to force recrawl to make sure we're in sync.
        // We're lazily initializing the rootInode to 0 here, so we don't
        // need to do this the first time through (we're already crawling
        // everything in that case).
        if (view->rootInode != 0) {
          root->scheduleRecrawl(
              "root was replaced and we didn't get notified by the kernel");
          return;
        }
        recursive = true;
        view->rootInode = st.ino;
      }
    } catch (const std::system_error& err) {
      handle_open_errno(root, dir, now, "getFileInformation", err.code());
      markDirDeleted(view, dir, now, true);
      return;
    }
  }

  memcpy(path, dir_name.data(), dir_name.size());
  path[dir_name.size()] = 0;

  w_log(W_LOG_DBG, "opendir(%s) recursive=%s\n",
      path, recursive ? "true" : "false");

  /* Start watching and open the dir for crawling.
   * Whether we open the dir prior to watching or after is watcher specific,
   * so the operations are rolled together in our abstraction */
  std::unique_ptr<watchman_dir_handle> osdir;

  try {
    osdir = watcher_->startWatchDir(root, dir, path);
  } catch (const std::system_error& err) {
    handle_open_errno(root, dir, now, "opendir", err.code());
    markDirDeleted(view, dir, now, true);
    return;
  }

  if (dir->files.empty()) {
    // Pre-size our hash(es) if we can, so that we can avoid collisions
    // and re-hashing during initial crawl
    uint32_t num_dirs = 0;
#ifndef _WIN32
    struct stat st;
    int dfd = osdir->getFd();
    if (dfd != -1 && fstat(dfd, &st) == 0) {
      num_dirs = (uint32_t)st.st_nlink;
    }
#endif
    // st.st_nlink is usually number of dirs + 2 (., ..).
    // If it is less than 2 then it doesn't follow that convention.
    // We just pass it through for the dir size hint and the hash
    // table implementation will round that up to the next power of 2
    apply_dir_size_hint(
        dir,
        num_dirs,
        uint32_t(root->config.getInt("hint_num_files_per_dir", 64)));
  }

  /* flag for delete detection */
  for (auto& it : dir->files) {
    auto file = it.second.get();
    if (file->exists) {
      file->maybe_deleted = true;
    }
  }

  try {
    while ((dirent = osdir->readDir()) != nullptr) {
      // Don't follow parent/self links
      if (dirent->d_name[0] == '.' &&
          (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))) {
        continue;
      }

      // Queue it up for analysis if the file is newly existing
      w_string name(dirent->d_name, W_STRING_BYTE);
      file = dir->getChildFile(name);
      if (file) {
        file->maybe_deleted = false;
      }
      if (!file || !file->exists || stat_all || recursive) {
        auto full_path = dir->getFullPathToChild(name);
        w_log(
            W_LOG_DBG,
            "in crawler calling process_path on %s\n",
            full_path.c_str());
        processPath(
            root,
            view,
            coll,
            full_path,
            now,
            ((recursive || !file || !file->exists) ? W_PENDING_RECURSIVE : 0),
            dirent);
      }
    }
  } catch (const std::system_error& exc) {
    log(ERR,
        "Error while reading dir ",
        path,
        ": ",
        exc.what(),
        ", re-adding to pending list to re-assess\n");
    coll->add(path, now, 0);
  }
  osdir.reset();

  // Anything still in maybe_deleted is actually deleted.
  // Arrange to re-process it shortly
  for (auto& it : dir->files) {
    auto file = it.second.get();
    if (file->exists &&
        (file->maybe_deleted || (file->stat.isDir() && recursive))) {
      coll->add(
          dir,
          file->getName().data(),
          now,
          recursive ? W_PENDING_RECURSIVE : 0);
    }
  }
}
Beispiel #9
0
  static std::unique_ptr<QueryExpr>
  parse(w_query*, const json_ref& term, CaseSensitivity caseSensitive) {
    const char *pattern = nullptr, *scope = "basename";
    const char *which =
        caseSensitive == CaseSensitivity::CaseInSensitive ? "iname" : "name";
    std::unordered_set<w_string> set;

    if (!term.isArray()) {
      throw QueryParseError("Expected array for '", which, "' term");
    }

    if (json_array_size(term) > 3) {
      throw QueryParseError(
          "Invalid number of arguments for '", which, "' term");
    }

    if (json_array_size(term) == 3) {
      const auto& jscope = term.at(2);
      if (!jscope.isString()) {
        throw QueryParseError("Argument 3 to '", which, "' must be a string");
      }

      scope = json_string_value(jscope);

      if (strcmp(scope, "basename") && strcmp(scope, "wholename")) {
        throw QueryParseError(
            "Invalid scope '", scope, "' for ", which, " expression");
      }
    }

    const auto& name = term.at(1);

    if (name.isArray()) {
      uint32_t i;

      for (i = 0; i < json_array_size(name); i++) {
        if (!json_array_get(name, i).isString()) {
          throw QueryParseError(
              "Argument 2 to '",
              which,
              "' must be either a string or an array of string");
        }
      }

      set.reserve(json_array_size(name));
      for (i = 0; i < json_array_size(name); i++) {
        w_string element;
        const auto& jele = name.at(i);
        auto ele = json_to_w_string(jele);

        if (caseSensitive == CaseSensitivity::CaseInSensitive) {
          element = ele.piece().asLowerCase(ele.type()).normalizeSeparators();
        } else {
          element = ele.normalizeSeparators();
        }

        set.insert(element);
      }

    } else if (name.isString()) {
      pattern = json_string_value(name);
    } else {
      throw QueryParseError(
          "Argument 2 to '",
          which,
          "' must be either a string or an array of string");
    }

    auto data = new NameExpr(std::move(set), caseSensitive,
                             !strcmp(scope, "wholename"));

    if (pattern) {
      data->name = json_to_w_string(name).normalizeSeparators();
    }

    return std::unique_ptr<QueryExpr>(data);
  }