Exemple #1
0
  void disconnected(
      const UUID& _connectionId,
      const string& failure)
  {
    // Ignore if the disconnection happened from an old stale connection.
    if (connectionId != _connectionId) {
      VLOG(1) << "Ignoring disconnection attempt from stale connection";
      return;
    }

    CHECK_NE(DISCONNECTED, state);

    VLOG(1) << "Disconnected from agent: " << failure;

    bool connected =
      (state == CONNECTED || state == SUBSCRIBING || state == SUBSCRIBED);

    if (connected) {
      // Invoke the disconnected callback the first time we disconnect from
      // the agent.
      mutex.lock()
        .then(defer(self(), [this]() {
          return async(callbacks.disconnected);
        }))
        .onAny(lambda::bind(&Mutex::unlock, mutex));
    }

    // Disconnect any active connections.
    disconnect();

    // This represents a disconnection due to a backoff attempt after being
    // already disconnected from the agent. We had already started the
    // recovery timer when we initially noticed the disconnection.
    if (recoveryTimer.isSome()) {
      CHECK(checkpoint);

      return;
    }

    if (checkpoint && connected) {
      CHECK_SOME(recoveryTimeout);
      CHECK_NONE(recoveryTimer);

      // Set up the recovery timeout upon disconnection. We only set it once per
      // disconnection. This ensures that when we try to (re-)connect with
      // the agent and are unsuccessful, we don't restart the recovery timer.
      recoveryTimer = delay(
          recoveryTimeout.get(),
          self(),
          &Self::_recoveryTimeout);

      // Backoff and reconnect only if framework checkpointing is enabled.
      backoff();
    } else {
      shutdown();
    }
  }
Exemple #2
0
TEST(ProcessTest, Async)
{
  ASSERT_TRUE(GTEST_IS_THREADSAFE);

  // Non-void functions with different no.of args.
  EXPECT_EQ(1, async(&foo).get());
  EXPECT_EQ(10, async(&foo1, 10).get());
  EXPECT_EQ(30, async(&foo2, 10, 20).get());
  EXPECT_EQ(60, async(&foo3, 10, 20, 30).get());
  EXPECT_EQ(100, async(&foo4, 10, 20, 30, 40).get());

  // Non-void function with a complex arg.
  int i = 42;
  EXPECT_EQ("42", async(&itoa2, &i).get());

  // Non-void function that returns a future.
  EXPECT_EQ("42", async(&itoa1, &i).get().get());
}
Exemple #3
0
 Future<Nothing> _receive()
 {
   Future<Nothing> future = async(callbacks.received, events);
   events = queue<Event>();
   return future;
 }
Exemple #4
0
  void connected(
      const UUID& _connectionId,
      const Future<Connection>& connection1,
      const Future<Connection>& connection2)
  {
    // It is possible that the agent process failed while we have an ongoing
    // (re-)connection attempt with the agent.
    if (connectionId != _connectionId) {
      VLOG(1) << "Ignoring connection attempt from stale connection";
      return;
    }

    CHECK_EQ(CONNECTING, state);
    CHECK_SOME(connectionId);

    if (!connection1.isReady()) {
      disconnected(connectionId.get(),
                   connection1.isFailed()
                     ? connection1.failure()
                     : "Subscribe future discarded");
      return;
    }

    if (!connection2.isReady()) {
      disconnected(connectionId.get(),
                   connection2.isFailed()
                     ? connection2.failure()
                     : "Non-subscribe future discarded");
      return;
    }

    VLOG(1) << "Connected with the agent";

    state = CONNECTED;

    connections = Connections {connection1.get(), connection2.get()};

    connections->subscribe.disconnected()
      .onAny(defer(self(),
                   &Self::disconnected,
                   connectionId.get(),
                   "Subscribe connection interrupted"));

    connections->nonSubscribe.disconnected()
      .onAny(defer(self(),
                   &Self::disconnected,
                   connectionId.get(),
                   "Non-subscribe connection interrupted"));

    // Cancel the recovery timer if we connected after a disconnection with the
    // agent when framework checkpointing is enabled. This ensures that we have
    // only one active timer instance at a given point of time.
    if (recoveryTimer.isSome()) {
      CHECK(checkpoint);

      Clock::cancel(recoveryTimer.get());
      recoveryTimer = None();
    }

    // Invoke the connected callback once we have established both subscribe
    // and non-subscribe connections with the agent.
    mutex.lock()
      .then(defer(self(), [this]() {
        return async(callbacks.connected);
      }))
      .onAny(lambda::bind(&Mutex::unlock, mutex));
  }
  // This method will be called in two cases:
  //   1. When a shared persistent volume is destroyed by agent, the parameter
  //      `path` will be the shared persistent volume's path.
  //   2. When a container is destroyed by containerizer, the parameter `path`
  //      will be the container's sandbox path.
  // We search if the given path is contained in `infos` (for the case 1) or is
  // the parent directory of any volume paths in `infos` (for the case 2, i.e.,
  // the PARENT type SANDBOX_PATH volume must be a subdirectory in the parent
  // container's sandbox) and then free the allocated gid for the found path(s).
  Future<Nothing> deallocate(const string& path)
  {
    vector<string> sandboxPathVolumes;

    bool changed = false;
    for (auto it = infos.begin(); it != infos.end(); ) {
      const VolumeGidInfo& info = it->second;
      const string& volumePath = info.path();

      if (strings::startsWith(volumePath, path)) {
        if (volumePath != path) {
          // This is the case of the PARENT type SANDBOX_PATH volume.
          sandboxPathVolumes.push_back(volumePath);
        }

        gid_t gid = info.gid();

        LOG(INFO) << "Deallocated gid " << gid << " for the volume path '"
                  << volumePath << "'";

        // Only return the gid to the free range if it is in the total
        // range. The gid may not be in the total range in the case that
        // Mesos agent is restarted with a different total range and we
        // deallocate gid for a previous volume path from the old range.
        if (totalGids.contains(gid)) {
          freeGids += gid;
          ++metrics.volume_gids_free;
        }

        it = infos.erase(it);
        changed = true;
      } else {
        ++it;
      }
    }

    // For the PARENT type SANDBOX_PATH volume, it will exist for a while
    // (depending on GC policy) after the container is destroyed. So to
    // avoid leaking it to other containers in the case that its gid is
    // allocated to another volume, we need to change its owner group back
    // to the original one (i.e., the primary group of its owner).
    vector<Future<Try<Nothing>>> futures;
    vector<pair<string, gid_t>> volumeGids;
    foreach (const string& volume, sandboxPathVolumes) {
      // Get the uid of the volume's owner.
      struct stat s;
      if (::stat(volume.c_str(), &s) < 0) {
        LOG(WARNING) << "Failed to stat '" << volume << "': "
                     << os::strerror(errno);

        continue;
      }

      Result<string> user = os::user(s.st_uid);
      if (!user.isSome()) {
        LOG(WARNING) << "Failed to get username for the uid " << s.st_uid
                     << ": " << (user.isError() ? user.error() : "not found");

        continue;
      }

      // Get the primary group ID of the user.
      Result<gid_t> gid = os::getgid(user.get());
      if (!gid.isSome()) {
        LOG(WARNING) << "Failed to get gid for the user '" << user.get()
                     << "': " << (gid.isError() ? gid.error() : "not found");

        continue;
      }

      futures.push_back(async(&setVolumeOwnership, volume, gid.get(), false));
      volumeGids.push_back({volume, gid.get()});
    }

    return await(futures)
      .then(defer(
          self(),
          [=](const vector<Future<Try<Nothing>>>& results) -> Future<Nothing> {
            for (size_t i = 0; i < results.size(); ++i) {
              const Future<Try<Nothing>>& result = results[i];
              const string& path = volumeGids[i].first;
              const gid_t gid = volumeGids[i].second;

              if (!result.isReady()) {
                LOG(WARNING) << "Failed to set the owner group of the volume "
                             << "path '" << path << "' back to " << gid << ": "
                             << (result.isFailed() ?
                                 result.failure() : "discarded");
              } else if (result->isError()) {
                LOG(WARNING) << "Failed to set the owner group of the volume "
                             << "path '" << path << "' back to " << gid << ": "
                             << result->error();
              }
            }

            if (changed) {
              Try<Nothing> status = persist();
              if (status.isError()) {
                return Failure(
                    "Failed to save state of volume gid infos: " +
                    status.error());
              }
            }

            return Nothing();
          }));
  }
  // This method will be called when a container running as non-root user tries
  // to use a shared persistent volume or a PARENT type SANDBOX_PATH volume, the
  // parameter `path` will be the source path of the volume.
  Future<gid_t> allocate(const string& path, VolumeGidInfo::Type type)
  {
    gid_t gid;

    // If a gid has already been allocated for the specified path,
    // just return the gid.
    if (infos.contains(path)) {
      gid = infos[path].gid();

      LOG(INFO) << "Use the allocated gid " << gid << " of the volume path '"
                << path << "'";

      // If we are already setting ownership for the specified path, skip the
      // additional setting.
      if (setting.contains(path)) {
        return setting[path]->future();
      }
    } else {
      struct stat s;
      if (::stat(path.c_str(), &s) < 0) {
        return Failure("Failed to stat '" + path + "': " + os::strerror(errno));
      }

      // If the gid of the specified path is in the total gid range, just
      // return the gid. This could happen in the case that nested container
      // uses persistent volume, in which case we did a workaround in the
      // default executor to set up a volume mapping (i.e., map the persistent
      // volume to a PARENT type SANDBOX_PATH volume for the nested container)
      // so that the nested container can access the persistent volume.
      //
      // Please note that in the case of shared persistent volume, operator
      // should NOT restart agent with a different total gid range, otherwise
      // the gid of the shared persistent volume may be overwritten if a nested
      // container tries to use the shared persistent volume after the restart.
      if (totalGids.contains(s.st_gid)) {
        gid = s.st_gid;

        LOG(INFO) << "Use the gid " << gid << " for the volume path '" << path
                  << "' which should be the mount point of another volume "
                  << "which is actually allocated with the gid";
      } else {
        // Allocate a free gid to the specified path and then set the
        // ownership for it.
        if (freeGids.empty()) {
          return Failure(
              "Failed to allocate gid to the volume path '" + path +
              "' because the free gid range is exhausted");
        }

        gid = freeGids.begin()->lower();

        LOG(INFO) << "Allocating gid " << gid << " to the volume path '"
                  << path << "'";

        freeGids -= gid;
        --metrics.volume_gids_free;

        VolumeGidInfo info;
        info.set_type(type);
        info.set_path(path);
        info.set_gid(gid);

        infos.put(path, info);

        Try<Nothing> status = persist();
        if (status.isError()) {
          return Failure(
              "Failed to save state of volume gid infos: " + status.error());
        }

        Owned<Promise<gid_t>> promise(new Promise<gid_t>());

        Future<gid_t> future = async(&setVolumeOwnership, path, gid, true)
          .then([path, gid](const Try<Nothing>& result) -> Future<gid_t> {
            if (result.isError()) {
              return Failure(
                  "Failed to set the owner group of the volume path '" + path +
                  "' to " + stringify(gid) + ": " + result.error());
            }

            return gid;
          })
          .onAny(defer(self(), [=](const Future<gid_t>&) {
            setting.erase(path);
          }));

        promise->associate(future);
        setting[path] = promise;

        return promise->future();
      }
    }

    return gid;
  }