// This test verifies that the launch of new executor will result in // an unschedule of the framework work directory created by an old // executor. TEST_F(GarbageCollectorIntegrationTest, Unschedule) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Future<SlaveRegisteredMessage> slaveRegistered = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); ExecutorInfo executor1; // Bug in gcc 4.1.*, must assign on next line. executor1 = CREATE_EXECUTOR_INFO("executor-1", "exit 1"); ExecutorInfo executor2; // Bug in gcc 4.1.*, must assign on next line. executor2 = CREATE_EXECUTOR_INFO("executor-2", "exit 1"); MockExecutor exec1(executor1.executor_id()); MockExecutor exec2(executor2.executor_id()); hashmap<ExecutorID, Executor*> execs; execs[executor1.executor_id()] = &exec1; execs[executor2.executor_id()] = &exec2; TestContainerizer containerizer(execs); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&containerizer, flags); ASSERT_SOME(slave); AWAIT_READY(slaveRegistered); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Resources resources = Resources::parse(flags.resources.get()).get(); double cpus = resources.get<Value::Scalar>("cpus").get().value(); double mem = resources.get<Value::Scalar>("mem").get().value(); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(executor1, 1, cpus, mem, "*")); EXPECT_CALL(exec1, registered(_, _, _, _)); EXPECT_CALL(exec1, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // TODO(benh/vinod): Would've been great to match the dispatch // against arguments here. // NOTE: Since Google Mock selects the last matching expectation // that is still active, the order of (un)schedule expectations // below are the reverse of the actual (un)schedule call order. // Schedule framework work directory. Future<Nothing> scheduleFrameworkWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Schedule top level executor work directory. Future<Nothing> scheduleExecutorWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Schedule executor run work directory. Future<Nothing> scheduleExecutorRunWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Unschedule framework work directory. Future<Nothing> unscheduleFrameworkWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::unschedule); // We ask the isolator to kill the first executor below. EXPECT_CALL(exec1, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(sched, statusUpdate(_, _)) .Times(AtMost(2)); // Once for a TASK_LOST then once for TASK_RUNNING. // We use the killed executor/tasks resources to run another task. EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(executor2, 1, cpus, mem, "*")); EXPECT_CALL(exec2, registered(_, _, _, _)); EXPECT_CALL(exec2, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Clock::pause(); // Kill the first executor. containerizer.destroy(frameworkId.get(), exec1.id); AWAIT_READY(scheduleExecutorRunWork); AWAIT_READY(scheduleExecutorWork); AWAIT_READY(scheduleFrameworkWork); // Speedup the allocator. while (unscheduleFrameworkWork.isPending()) { Clock::advance(Seconds(1)); Clock::settle(); } AWAIT_READY(unscheduleFrameworkWork); Clock::resume(); EXPECT_CALL(exec2, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'isolator' gets deallocated. }
// Tests whether a slave correctly detects the new master when its // ZooKeeper session is expired and a new master is elected before the // slave reconnects with ZooKeeper. TEST_F(ZooKeeperMasterContenderDetectorTest, MasterDetectorExpireSlaveZKSessionNewMaster) { Try<zookeeper::URL> url = zookeeper::URL::parse( "zk://" + server->connectString() + "/mesos"); ASSERT_SOME(url); // Simulate a leading master. Owned<zookeeper::Group> leaderGroup( new Group(url.get(), MASTER_CONTENDER_ZK_SESSION_TIMEOUT)); // 1. Simulate a leading contender. ZooKeeperMasterContender leaderContender(leaderGroup); ZooKeeperMasterDetector leaderDetector(leaderGroup); PID<Master> pid; pid.node.ip = 10000000; pid.node.port = 10000; MasterInfo leader = internal::protobuf::createMasterInfo(pid); leaderContender.initialize(leader); Future<Future<Nothing> > contended = leaderContender.contend(); AWAIT_READY(contended); Future<Option<MasterInfo> > detected = leaderDetector.detect(None()); AWAIT_READY(detected); EXPECT_SOME_EQ(leader, detected.get()); // 2. Simulate a non-leading contender. Owned<zookeeper::Group> followerGroup( new Group(url.get(), MASTER_CONTENDER_ZK_SESSION_TIMEOUT)); ZooKeeperMasterContender followerContender(followerGroup); ZooKeeperMasterDetector followerDetector(followerGroup); PID<Master> pid2; pid2.node.ip = 10000001; pid2.node.port = 10001; MasterInfo follower = internal::protobuf::createMasterInfo(pid2); followerContender.initialize(follower); contended = followerContender.contend(); AWAIT_READY(contended); detected = followerDetector.detect(None()); EXPECT_SOME_EQ(leader, detected.get()); // 3. Simulate a non-contender. Owned<zookeeper::Group> nonContenderGroup( new Group(url.get(), MASTER_DETECTOR_ZK_SESSION_TIMEOUT)); ZooKeeperMasterDetector nonContenderDetector(nonContenderGroup); detected = nonContenderDetector.detect(); EXPECT_SOME_EQ(leader, detected.get()); detected = nonContenderDetector.detect(leader); // Now expire the slave's and leading master's zk sessions. // NOTE: Here we assume that slave stays disconnected from the ZK // when the leading master loses its session. Future<Option<int64_t> > slaveSession = nonContenderGroup->session(); AWAIT_READY(slaveSession); Future<Option<int64_t> > masterSession = leaderGroup->session(); AWAIT_READY(masterSession); server->expireSession(slaveSession.get().get()); server->expireSession(masterSession.get().get()); // Wait for session expiration and the detector will first receive // a "no master detected" event. AWAIT_READY(detected); EXPECT_NONE(detected.get()); // nonContenderDetector can now re-detect the new master. detected = nonContenderDetector.detect(detected.get()); AWAIT_READY(detected); EXPECT_SOME_EQ(follower, detected.get()); }
// This test does not set any Accept header for the subscribe call. // The default response media type should be "application/json" in // this case. TEST_P(ExecutorHttpApiTest, NoAcceptHeader) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); ExecutorID executorId = DEFAULT_EXECUTOR_ID; MockExecutor exec(executorId); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)); Future<Nothing> statusUpdate; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureSatisfy(&statusUpdate)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); TaskInfo taskInfo = createTask(offers.get()[0], "", executorId); driver.launchTasks(offers.get()[0].id(), {taskInfo}); // Wait until status update is received on the scheduler before sending // an executor subscribe request. AWAIT_READY(statusUpdate); // Only subscribe needs to 'Accept' JSON or protobuf. Call call; call.mutable_framework_id()->CopyFrom(evolve(frameworkId.get())); call.mutable_executor_id()->CopyFrom(evolve(executorId)); call.set_type(Call::SUBSCRIBE); call.mutable_subscribe(); // Retrieve the parameter passed as content type to this test. const ContentType contentType = GetParam(); // No 'Accept' header leads to all media types considered // acceptable. JSON will be chosen by default. process::http::Headers headers; Future<Response> response = process::http::streaming::post( slave.get(), "api/v1/executor", headers, serialize(contentType, call), stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); EXPECT_SOME_EQ(APPLICATION_JSON, response.get().headers.get("Content-Type")); Shutdown(); }
void PosixDiskIsolatorProcess::_collect( const ContainerID& containerId, const string& path, const Future<Bytes>& future) { if (future.isDiscarded()) { LOG(INFO) << "Checking disk usage at '" << path << "' for container " << containerId << " has been cancelled"; } else if (future.isFailed()) { LOG(ERROR) << "Checking disk usage at '" << path << "' for container " << containerId << " has failed: " << future.failure(); } if (!infos.contains(containerId)) { // The container might have just been destroyed. return; } const Owned<Info>& info = infos[containerId]; if (!info->paths.contains(path)) { // The path might have just been removed from this container's // resources. return; } // Check if the disk usage exceeds the quota. If yes, report the // limitation. We keep collecting the disk usage for 'path' by // initiating another round of disk usage check. The check will be // throttled by DiskUsageCollector. if (future.isReady()) { // Save the last disk usage. info->paths[path].lastUsage = future.get(); // We need to ignore the quota enforcement check for MOUNT type // disk resources because its quota will be enforced by the // underlying filesystem. bool isDiskSourceMount = false; foreach (const Resource& resource, info->paths[path].quota) { if (resource.has_disk() && resource.disk().has_source() && resource.disk().source().type() == Resource::DiskInfo::Source::MOUNT) { isDiskSourceMount = true; } } if (flags.enforce_container_disk_quota && !isDiskSourceMount) { Option<Bytes> quota = info->paths[path].quota.disk(); CHECK_SOME(quota); if (future.get() > quota.get()) { info->limitation.set( protobuf::slave::createContainerLimitation( Resources(info->paths[path].quota), "Disk usage (" + stringify(future.get()) + ") exceeds quota (" + stringify(quota.get()) + ")", TaskStatus::REASON_CONTAINER_LIMITATION_DISK)); } } }
// Tests that detectors do not fail when we reach our ZooKeeper // session timeout. TEST_F(ZooKeeperMasterContenderDetectorTest, MasterDetectorTimedoutSession) { // Use an arbitrary timeout value. Duration sessionTimeout(Seconds(10)); Try<zookeeper::URL> url = zookeeper::URL::parse( "zk://" + server->connectString() + "/mesos"); ASSERT_SOME(url); Owned<zookeeper::Group> leaderGroup(new Group(url.get(), sessionTimeout)); // First we bring up three master contender/detector: // 1. A leading contender. // 2. A non-leading contender. // 3. A non-contender (detector). // 1. Simulate a leading contender. ZooKeeperMasterContender leaderContender(leaderGroup); PID<Master> pid; pid.node.ip = 10000000; pid.node.port = 10000; MasterInfo leader = internal::protobuf::createMasterInfo(pid); leaderContender.initialize(leader); Future<Future<Nothing> > contended = leaderContender.contend(); AWAIT_READY(contended); Future<Nothing> leaderLostCandidacy = contended.get(); ZooKeeperMasterDetector leaderDetector(leaderGroup); Future<Option<MasterInfo> > detected = leaderDetector.detect(); AWAIT_READY(detected); EXPECT_SOME_EQ(leader, detected.get()); // 2. Simulate a non-leading contender. Owned<zookeeper::Group> followerGroup(new Group(url.get(), sessionTimeout)); ZooKeeperMasterContender followerContender(followerGroup); PID<Master> pid2; pid2.node.ip = 10000001; pid2.node.port = 10001; MasterInfo follower = internal::protobuf::createMasterInfo(pid2); followerContender.initialize(follower); contended = followerContender.contend(); AWAIT_READY(contended); Future<Nothing> followerLostCandidacy = contended.get(); ZooKeeperMasterDetector followerDetector(followerGroup); detected = followerDetector.detect(); AWAIT_READY(detected); EXPECT_SOME_EQ(leader, detected.get()); // 3. Simulate a non-contender. Owned<zookeeper::Group> nonContenderGroup( new Group(url.get(), sessionTimeout)); ZooKeeperMasterDetector nonContenderDetector(nonContenderGroup); detected = nonContenderDetector.detect(); EXPECT_SOME_EQ(leader, detected.get()); // Expecting the reconnecting event after we shut down the ZK. Future<Nothing> leaderReconnecting = FUTURE_DISPATCH( leaderGroup->process->self(), &GroupProcess::reconnecting); Future<Nothing> followerReconnecting = FUTURE_DISPATCH( followerGroup->process->self(), &GroupProcess::reconnecting); Future<Nothing> nonContenderReconnecting = FUTURE_DISPATCH( nonContenderGroup->process->self(), &GroupProcess::reconnecting); server->shutdownNetwork(); AWAIT_READY(leaderReconnecting); AWAIT_READY(followerReconnecting); AWAIT_READY(nonContenderReconnecting); // Now the detectors re-detect. Future<Option<MasterInfo> > leaderDetected = leaderDetector.detect(leader); Future<Option<MasterInfo> > followerDetected = followerDetector.detect(leader); Future<Option<MasterInfo> > nonContenderDetected = nonContenderDetector.detect(leader); Clock::pause(); // We may need to advance multiple times because we could have // advanced the clock before the timer in Group starts. while (leaderDetected.isPending() || followerDetected.isPending() || nonContenderDetected.isPending() || leaderLostCandidacy.isPending() || followerLostCandidacy.isPending()) { Clock::advance(sessionTimeout); Clock::settle(); } EXPECT_NONE(leaderDetected.get()); EXPECT_NONE(followerDetected.get()); EXPECT_NONE(nonContenderDetected.get()); EXPECT_TRUE(leaderLostCandidacy.isReady()); EXPECT_TRUE(followerLostCandidacy.isReady()); Clock::resume(); }
TEST_F(ExceptionTest, DisallowSchedulerCallbacksOnAbort) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); Future<process::Message> message = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // None of these callbacks should be invoked. EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(0); EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); EXPECT_CALL(sched, frameworkMessage(&driver, _, _, _)) .Times(0); EXPECT_CALL(sched, slaveLost(&driver, _)) .Times(0); EXPECT_CALL(sched, error(&driver, _)) .Times(0); ASSERT_EQ(DRIVER_ABORTED, driver.abort()); Future<RescindResourceOfferMessage> rescindMsg = FUTURE_PROTOBUF(RescindResourceOfferMessage(), _, _); // Simulate a message from master to the scheduler. RescindResourceOfferMessage rescindMessage; rescindMessage.mutable_offer_id()->MergeFrom(offers.get()[0].id()); process::post(message.get().to, rescindMessage); AWAIT_READY(rescindMsg); Future<UnregisterFrameworkMessage> unregisterMsg = FUTURE_PROTOBUF(UnregisterFrameworkMessage(), _, _); driver.stop(); //Ensures reception of RescindResourceOfferMessage. AWAIT_READY(unregisterMsg); Shutdown(); }
// This test verifies that the provisioner can provision an rootfs // from an image that is already put into the store directory. TEST_F(ProvisionerAppcTest, ROOT_Provision) { // Create provisioner. slave::Flags flags; flags.image_providers = "APPC"; flags.appc_store_dir = path::join(os::getcwd(), "store"); flags.image_provisioner_backend = "bind"; flags.work_dir = "work_dir"; Fetcher fetcher; Try<Owned<Provisioner>> provisioner = Provisioner::create(flags, &fetcher); ASSERT_SOME(provisioner); // Create a simple image in the store: // <store> // |--images // |--<id> // |--manifest // |--rootfs/tmp/test JSON::Value manifest = JSON::parse( "{" " \"acKind\": \"ImageManifest\"," " \"acVersion\": \"0.6.1\"," " \"name\": \"foo.com/bar\"," " \"labels\": [" " {" " \"name\": \"version\"," " \"value\": \"1.0.0\"" " }," " {" " \"name\": \"arch\"," " \"value\": \"amd64\"" " }," " {" " \"name\": \"os\"," " \"value\": \"linux\"" " }" " ]," " \"annotations\": [" " {" " \"name\": \"created\"," " \"value\": \"1438983392\"" " }" " ]" "}").get(); // The 'imageId' below has the correct format but it's not computed // by hashing the tarball of the image. It's OK here as we assume // the images under 'images' have passed such check when they are // downloaded and validated. string imageId = "sha512-e77d96aa0240eedf134b8c90baeaf76dca8e78691836301d7498c84020446042e" "797b296d6ab296e0954c2626bfb264322ebeb8f447dac4fac6511ea06bc61f0"; string imagePath = path::join(flags.appc_store_dir, "images", imageId); ASSERT_SOME(os::mkdir(path::join(imagePath, "rootfs", "tmp"))); ASSERT_SOME( os::write(path::join(imagePath, "rootfs", "tmp", "test"), "test")); ASSERT_SOME( os::write(path::join(imagePath, "manifest"), stringify(manifest))); // Recover. This is when the image in the store is loaded. AWAIT_READY(provisioner.get()->recover({}, {})); // Simulate a task that requires an image. Image image; image.mutable_appc()->set_name("foo.com/bar"); ContainerID containerId; containerId.set_value("12345"); Future<string> rootfs = provisioner.get()->provision(containerId, image); AWAIT_READY(rootfs); string provisionerDir = slave::paths::getProvisionerDir(flags.work_dir); string containerDir = slave::provisioner::paths::getContainerDir( provisionerDir, containerId); Try<hashmap<string, hashset<string>>> rootfses = slave::provisioner::paths::listContainerRootfses( provisionerDir, containerId); ASSERT_SOME(rootfses); // Verify that the rootfs is successfully provisioned. ASSERT_TRUE(rootfses->contains(flags.image_provisioner_backend)); ASSERT_EQ(1u, rootfses->get(flags.image_provisioner_backend)->size()); EXPECT_EQ(*rootfses->get(flags.image_provisioner_backend)->begin(), Path(rootfs.get()).basename()); Future<bool> destroy = provisioner.get()->destroy(containerId); AWAIT_READY(destroy); // One rootfs is destroyed. EXPECT_TRUE(destroy.get()); // The container directory is successfully cleaned up. EXPECT_FALSE(os::exists(containerDir)); }
// This test verifies that the provisioner can provision an rootfs // from an image that is already put into the store directory. TEST_F(ProvisionerAppcTest, ROOT_Provision) { // Create provisioner. slave::Flags flags; flags.image_providers = "APPC"; flags.appc_store_dir = path::join(os::getcwd(), "store"); flags.image_provisioner_backend = "bind"; flags.work_dir = "work_dir"; Try<Owned<Provisioner>> provisioner = Provisioner::create(flags); ASSERT_SOME(provisioner); Try<string> createImage = createTestImage( flags.appc_store_dir, getManifest()); ASSERT_SOME(createImage); // Recover. This is when the image in the store is loaded. AWAIT_READY(provisioner.get()->recover({}, {})); // Simulate a task that requires an image. Image image; image.mutable_appc()->CopyFrom(getTestImage()); ContainerID containerId; containerId.set_value("12345"); Future<slave::ProvisionInfo> provisionInfo = provisioner.get()->provision(containerId, image); AWAIT_READY(provisionInfo); string provisionerDir = slave::paths::getProvisionerDir(flags.work_dir); string containerDir = slave::provisioner::paths::getContainerDir( provisionerDir, containerId); Try<hashmap<string, hashset<string>>> rootfses = slave::provisioner::paths::listContainerRootfses( provisionerDir, containerId); ASSERT_SOME(rootfses); // Verify that the rootfs is successfully provisioned. ASSERT_TRUE(rootfses->contains(flags.image_provisioner_backend)); ASSERT_EQ(1u, rootfses->get(flags.image_provisioner_backend)->size()); EXPECT_EQ(*rootfses->get(flags.image_provisioner_backend)->begin(), Path(provisionInfo.get().rootfs).basename()); Future<bool> destroy = provisioner.get()->destroy(containerId); AWAIT_READY(destroy); // One rootfs is destroyed. EXPECT_TRUE(destroy.get()); // The container directory is successfully cleaned up. EXPECT_FALSE(os::exists(containerDir)); }
// This test verifies that a provisioner can recover the rootfs // provisioned by a previous provisioner and then destroy it. Note // that we use the copy backend in this test so Linux is not required. TEST_F(ProvisionerAppcTest, Recover) { // Create provisioner. slave::Flags flags; flags.image_providers = "APPC"; flags.appc_store_dir = path::join(os::getcwd(), "store"); flags.image_provisioner_backend = "copy"; flags.work_dir = "work_dir"; Try<Owned<Provisioner>> provisioner1 = Provisioner::create(flags); ASSERT_SOME(provisioner1); Try<string> createImage = createTestImage( flags.appc_store_dir, getManifest()); ASSERT_SOME(createImage); // Recover. This is when the image in the store is loaded. AWAIT_READY(provisioner1.get()->recover({}, {})); Image image; image.mutable_appc()->CopyFrom(getTestImage()); ContainerID containerId; containerId.set_value(UUID::random().toString()); Future<slave::ProvisionInfo> provisionInfo = provisioner1.get()->provision(containerId, image); AWAIT_READY(provisionInfo); // Create a new provisioner to recover the state from the container. Try<Owned<Provisioner>> provisioner2 = Provisioner::create(flags); ASSERT_SOME(provisioner2); mesos::slave::ContainerState state; // Here we are using an ExecutorInfo in the ContainerState without a // ContainerInfo. This is the situation where the Image is specified // via --default_container_info so it's not part of the recovered // ExecutorInfo. state.mutable_container_id()->CopyFrom(containerId); AWAIT_READY(provisioner2.get()->recover({state}, {})); // It's possible for the user to provision two different rootfses // from the same image. AWAIT_READY(provisioner2.get()->provision(containerId, image)); string provisionerDir = slave::paths::getProvisionerDir(flags.work_dir); string containerDir = slave::provisioner::paths::getContainerDir( provisionerDir, containerId); Try<hashmap<string, hashset<string>>> rootfses = slave::provisioner::paths::listContainerRootfses( provisionerDir, containerId); ASSERT_SOME(rootfses); // Verify that the rootfs is successfully provisioned. ASSERT_TRUE(rootfses->contains(flags.image_provisioner_backend)); EXPECT_EQ(2u, rootfses->get(flags.image_provisioner_backend)->size()); Future<bool> destroy = provisioner2.get()->destroy(containerId); AWAIT_READY(destroy); EXPECT_TRUE(destroy.get()); // The container directory is successfully cleaned up. EXPECT_FALSE(os::exists(containerDir)); }
TEST(MonitorTest, Statistics) { FrameworkID frameworkId; frameworkId.set_value("framework"); ExecutorID executorId; executorId.set_value("executor"); ExecutorInfo executorInfo; executorInfo.mutable_executor_id()->CopyFrom(executorId); executorInfo.mutable_framework_id()->CopyFrom(frameworkId); executorInfo.set_name("name"); executorInfo.set_source("source"); ResourceStatistics statistics; statistics.set_cpus_nr_periods(100); statistics.set_cpus_nr_throttled(2); statistics.set_cpus_user_time_secs(4); statistics.set_cpus_system_time_secs(1); statistics.set_cpus_throttled_time_secs(0.5); statistics.set_cpus_limit(1.0); statistics.set_mem_file_bytes(0); statistics.set_mem_anon_bytes(0); statistics.set_mem_mapped_file_bytes(0); statistics.set_mem_rss_bytes(1024); statistics.set_mem_limit_bytes(2048); statistics.set_timestamp(0); ResourceMonitor monitor([=]() -> Future<ResourceUsage> { Resources resources = Resources::parse("cpus:1;mem:2").get(); ResourceUsage usage; ResourceUsage::Executor* executor = usage.add_executors(); executor->mutable_executor_info()->CopyFrom(executorInfo); executor->mutable_allocated()->CopyFrom(resources); executor->mutable_statistics()->CopyFrom(statistics); return usage; }); UPID upid("monitor", process::address()); Future<http::Response> response = http::get(upid, "statistics"); AWAIT_READY(response); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); JSON::Array expected; JSON::Object usage; usage.values["executor_id"] = "executor"; usage.values["executor_name"] = "name"; usage.values["framework_id"] = "framework"; usage.values["source"] = "source"; usage.values["statistics"] = JSON::Protobuf(statistics); expected.values.push_back(usage); Try<JSON::Array> result = JSON::parse<JSON::Array>(response.get().body); ASSERT_SOME(result); ASSERT_EQ(expected, result.get()); }
// Abstracts the manifest accessor for the test fixture. This provides the // ability for customizing manifests for fixtures. virtual JSON::Value getManifest() const { return JSON::parse( R"~( { "acKind": "ImageManifest", "acVersion": "0.6.1", "name": "foo.com/bar", "labels": [ { "name": "version", "value": "1.0.0" }, { "name": "arch", "value": "amd64" }, { "name": "os", "value": "linux" } ], "annotations": [ { "name": "created", "value": "1438983392" } ] })~").get(); } }; TEST_F(AppcStoreTest, Recover) { // Create store. slave::Flags flags; flags.appc_store_dir = path::join(os::getcwd(), "store"); Try<Owned<slave::Store>> store = Store::create(flags); ASSERT_SOME(store); Try<string> createImage = createTestImage( flags.appc_store_dir, getManifest()); ASSERT_SOME(createImage); const string imagePath = createImage.get(); // Recover the image from disk. AWAIT_READY(store.get()->recover()); Image image; image.mutable_appc()->CopyFrom(getTestImage()); Future<slave::ImageInfo> ImageInfo = store.get()->get(image); AWAIT_READY(ImageInfo); EXPECT_EQ(1u, ImageInfo.get().layers.size()); ASSERT_SOME(os::realpath(imagePath)); EXPECT_EQ( os::realpath(path::join(imagePath, "rootfs")).get(), ImageInfo.get().layers.front()); }
// This is an end-to-end test that verfies that the slave returns the // correct ResourceUsage based on the currently running executors, and // the values get from the statistics endpoint are as expected. TEST_F(MonitorIntegrationTest, RunningExecutor) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_FALSE(offers.get().empty()); const Offer& offer = offers.get()[0]; // Launch a task and wait until it is in RUNNING status. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:32").get(), "sleep 1000"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Hit the statistics endpoint and expect the response contains the // resource statistics for the running container. UPID upid("monitor", process::address()); Future<http::Response> response = http::get(upid, "statistics"); AWAIT_READY(response); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); // Verify that the statistics in the response contains the proper // resource limits for the container. Try<JSON::Value> value = JSON::parse(response.get().body); ASSERT_SOME(value); Try<JSON::Value> expected = JSON::parse(strings::format( "[{" "\"statistics\":{" "\"cpus_limit\":%g," "\"mem_limit_bytes\":%lu" "}" "}]", 1 + slave::DEFAULT_EXECUTOR_CPUS, (Megabytes(32) + slave::DEFAULT_EXECUTOR_MEM).bytes()).get()); ASSERT_SOME(expected); EXPECT_TRUE(value.get().contains(expected.get())); driver.stop(); driver.join(); Shutdown(); }
TEST(MetricsTest, SnapshotTimeout) { ASSERT_TRUE(GTEST_IS_THREADSAFE); UPID upid("metrics", process::address()); Clock::pause(); // Advance the clock to avoid rate limit. Clock::advance(Seconds(1)); // Ensure the timeout parameter is validated. AWAIT_EXPECT_RESPONSE_STATUS_EQ( BadRequest().status, http::get(upid, "snapshot", "timeout=foobar")); // Advance the clock to avoid rate limit. Clock::advance(Seconds(1)); // Add gauges and a counter. GaugeProcess process; PID<GaugeProcess> pid = spawn(&process); ASSERT_TRUE(pid); Gauge gauge("test/gauge", defer(pid, &GaugeProcess::get)); Gauge gaugeFail("test/gauge_fail", defer(pid, &GaugeProcess::fail)); Gauge gaugeTimeout("test/gauge_timeout", defer(pid, &GaugeProcess::pending)); Counter counter("test/counter"); AWAIT_READY(metrics::add(gauge)); AWAIT_READY(metrics::add(gaugeFail)); AWAIT_READY(metrics::add(gaugeTimeout)); AWAIT_READY(metrics::add(counter)); // Advance the clock to avoid rate limit. Clock::advance(Seconds(1)); // Get the snapshot. Future<Response> response = http::get(upid, "snapshot", "timeout=2secs"); // Make sure the request is pending before the timeout is exceeded. Clock::settle(); ASSERT_TRUE(response.isPending()); // Advance the clock to trigger the timeout. Clock::advance(Seconds(2)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); // Parse the response. Try<JSON::Object> responseJSON = JSON::parse<JSON::Object>(response.get().body); ASSERT_SOME(responseJSON); // We can't use simple JSON equality testing here as initializing // libprocess adds metrics to the system. We want to only check if // the metrics from this test are correctly handled. map<string, JSON::Value> values = responseJSON.get().values; EXPECT_EQ(1u, values.count("test/counter")); EXPECT_FLOAT_EQ(0.0, values["test/counter"].as<JSON::Number>().value); EXPECT_EQ(1u, values.count("test/gauge")); EXPECT_FLOAT_EQ(42.0, values["test/gauge"].as<JSON::Number>().value); EXPECT_EQ(0u, values.count("test/gauge_fail")); EXPECT_EQ(0u, values.count("test/gauge_timeout")); // Remove the metrics and ensure they are no longer in the snapshot. AWAIT_READY(metrics::remove(gauge)); AWAIT_READY(metrics::remove(gaugeFail)); AWAIT_READY(metrics::remove(gaugeTimeout)); AWAIT_READY(metrics::remove(counter)); // Advance the clock to avoid rate limit. Clock::advance(Seconds(1)); // Ensure MetricsProcess has removed the metrics. Clock::settle(); response = http::get(upid, "snapshot", "timeout=2secs"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); // Parse the response. responseJSON = JSON::parse<JSON::Object>(response.get().body); ASSERT_SOME(responseJSON); values = responseJSON.get().values; ASSERT_SOME(responseJSON); EXPECT_EQ(0u, values.count("test/counter")); EXPECT_EQ(0u, values.count("test/gauge")); EXPECT_EQ(0u, values.count("test/gauge_fail")); EXPECT_EQ(0u, values.count("test/gauge_timeout")); terminate(process); wait(process); }
TEST(MetricsTest, Snapshot) { ASSERT_TRUE(GTEST_IS_THREADSAFE); UPID upid("metrics", process::address()); Clock::pause(); // Add a gauge and a counter. GaugeProcess process; PID<GaugeProcess> pid = spawn(&process); ASSERT_TRUE(pid); Gauge gauge("test/gauge", defer(pid, &GaugeProcess::get)); Gauge gaugeFail("test/gauge_fail", defer(pid, &GaugeProcess::fail)); Counter counter("test/counter"); AWAIT_READY(metrics::add(gauge)); AWAIT_READY(metrics::add(gaugeFail)); AWAIT_READY(metrics::add(counter)); // Advance the clock to avoid rate limit. Clock::advance(Seconds(1)); // Get the snapshot. Future<Response> response = http::get(upid, "snapshot"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); // Parse the response. Try<JSON::Object> responseJSON = JSON::parse<JSON::Object>(response.get().body); ASSERT_SOME(responseJSON); map<string, JSON::Value> values = responseJSON.get().values; EXPECT_EQ(1u, values.count("test/counter")); EXPECT_FLOAT_EQ(0.0, values["test/counter"].as<JSON::Number>().value); EXPECT_EQ(1u, values.count("test/gauge")); EXPECT_FLOAT_EQ(42.0, values["test/gauge"].as<JSON::Number>().value); EXPECT_EQ(0u, values.count("test/gauge_fail")); // Remove the metrics and ensure they are no longer in the snapshot. AWAIT_READY(metrics::remove(gauge)); AWAIT_READY(metrics::remove(gaugeFail)); AWAIT_READY(metrics::remove(counter)); // Advance the clock to avoid rate limit. Clock::advance(Seconds(1)); // Ensure MetricsProcess has removed the metrics. Clock::settle(); response = http::get(upid, "snapshot"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); // Parse the response. responseJSON = JSON::parse<JSON::Object>(response.get().body); ASSERT_SOME(responseJSON); values = responseJSON.get().values; EXPECT_EQ(0u, values.count("test/counter")); EXPECT_EQ(0u, values.count("test/gauge")); EXPECT_EQ(0u, values.count("test/gauge_fail")); terminate(process); wait(process); }
TEST_F(LogStateTest, Diff) { Future<Variable<Slaves>> future1 = state->fetch<Slaves>("slaves"); AWAIT_READY(future1); Variable<Slaves> variable = future1.get(); Slaves slaves = variable.get(); ASSERT_EQ(0, slaves.slaves().size()); for (size_t i = 0; i < 1024; i++) { Slave* slave = slaves.add_slaves(); slave->mutable_info()->set_hostname("localhost" + stringify(i)); } variable = variable.mutate(slaves); Future<Option<Variable<Slaves>>> future2 = state->store(variable); AWAIT_READY(future2); ASSERT_SOME(future2.get()); variable = future2.get().get(); Slave* slave = slaves.add_slaves(); slave->mutable_info()->set_hostname("localhost1024"); variable = variable.mutate(slaves); future2 = state->store(variable); AWAIT_READY(future2); ASSERT_SOME(future2.get()); // It's possible that we're doing truncation asynchronously which // will cause the test to fail because we'll end up getting a // pending position from Log::Reader::ending which will cause // Log::Reader::read to fail. To remedy this, we pause the clock and // wait for all executing processe to settle. Clock::pause(); Clock::settle(); Clock::resume(); Log::Reader reader(log); Future<Log::Position> beginning = reader.beginning(); Future<Log::Position> ending = reader.ending(); AWAIT_READY(beginning); AWAIT_READY(ending); Future<list<Log::Entry>> entries = reader.read(beginning.get(), ending.get()); AWAIT_READY(entries); // Convert each Log::Entry to a Operation. vector<Operation> operations; foreach (const Log::Entry& entry, entries.get()) { // Parse the Operation from the Log::Entry. Operation operation; google::protobuf::io::ArrayInputStream stream( entry.data.data(), entry.data.size()); ASSERT_TRUE(operation.ParseFromZeroCopyStream(&stream)); operations.push_back(operation); } ASSERT_EQ(2u, operations.size()); EXPECT_EQ(Operation::SNAPSHOT, operations[0].type()); EXPECT_EQ(Operation::DIFF, operations[1].type()); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send status updates, we send // a ShutdownMessage to the slave. Why? Because during a network // partition, the master will remove a partitioned slave, thus sending // its tasks to LOST. At this point, when the partition is removed, // the slave may attempt to send updates if it was unaware that the // master removed it. We've already notified frameworks that these // tasks were LOST, so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveStatusUpdates) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave sends an update. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); EXPECT_CALL(sched, offerRescinded(&driver, _)) .WillRepeatedly(Return()); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // At this point, the slave still thinks it's registered, so we // simulate a status update coming from the slave. TaskID taskId; taskId.set_value("task_id"); const StatusUpdate& update = protobuf::createStatusUpdate( frameworkId.get(), slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_SLAVE, UUID::random()); StatusUpdateMessage message; message.mutable_update()->CopyFrom(update); message.set_pid(stringify(slave.get()->pid)); process::post(master.get()->pid, message); // The master should shutdown the slave upon receiving the update. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }
Future<Nothing> CopyFetcherPlugin::fetch( const URI& uri, const string& directory) const { // TODO(jojy): Validate the given URI. if (!uri.has_path()) { return Failure("URI path is not specified"); } // TODO(jojy): Verify that the path is a file. Try<Nothing> mkdir = os::mkdir(directory); if (mkdir.isError()) { return Failure( "Failed to create directory '" + directory + "': " + mkdir.error()); } VLOG(1) << "Copying '" << uri.path() << "' to '" << directory << "'"; const vector<string> argv = {"cp", "-a", uri.path(), directory}; Try<Subprocess> s = subprocess( "cp", argv, Subprocess::PATH(os::DEV_NULL), Subprocess::PIPE(), Subprocess::PIPE()); if (s.isError()) { return Failure("Failed to exec the copy subprocess: " + s.error()); } return await( s.get().status(), io::read(s.get().out().get()), io::read(s.get().err().get())) .then([](const tuple< Future<Option<int>>, Future<string>, Future<string>>& t) -> Future<Nothing> { Future<Option<int>> status = std::get<0>(t); if (!status.isReady()) { return Failure( "Failed to get the exit status of the copy subprocess: " + (status.isFailed() ? status.failure() : "discarded")); } if (status->isNone()) { return Failure("Failed to reap the copy subprocess"); } if (status->get() != 0) { Future<string> error = std::get<2>(t); if (!error.isReady()) { return Failure( "Failed to perform 'copy'. Reading stderr failed: " + (error.isFailed() ? error.failure() : "discarded")); } return Failure("Failed to perform 'copy': " + error.get()); } return Nothing(); }); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); StandaloneMasterDetector detector(master.get()); Try<PID<Slave> > slave = StartSlave(&exec, &detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Have the slave re-register with the master. detector.appoint(master.get()); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); Shutdown(); }
TEST_F(AppcStoreTest, Recover) { // Create store. slave::Flags flags; flags.appc_store_dir = path::join(os::getcwd(), "store"); Try<Owned<slave::Store>> store = Store::create(flags); ASSERT_SOME(store); // Create a simple image in the store: // <store> // |--images // |--<id> // |--manifest // |--rootfs/tmp/test JSON::Value manifest = JSON::parse( "{" " \"acKind\": \"ImageManifest\"," " \"acVersion\": \"0.6.1\"," " \"name\": \"foo.com/bar\"," " \"labels\": [" " {" " \"name\": \"version\"," " \"value\": \"1.0.0\"" " }," " {" " \"name\": \"arch\"," " \"value\": \"amd64\"" " }," " {" " \"name\": \"os\"," " \"value\": \"linux\"" " }" " ]," " \"annotations\": [" " {" " \"name\": \"created\"," " \"value\": \"1438983392\"" " }" " ]" "}").get(); // The 'imageId' below has the correct format but it's not computed // by hashing the tarball of the image. It's OK here as we assume // the images under 'images' have passed such check when they are // downloaded and validated. string imageId = "sha512-e77d96aa0240eedf134b8c90baeaf76dca8e78691836301d7498c84020446042e" "797b296d6ab296e0954c2626bfb264322ebeb8f447dac4fac6511ea06bc61f0"; string imagePath = path::join(flags.appc_store_dir, "images", imageId); ASSERT_SOME(os::mkdir(path::join(imagePath, "rootfs", "tmp"))); ASSERT_SOME( os::write(path::join(imagePath, "rootfs", "tmp", "test"), "test")); ASSERT_SOME( os::write(path::join(imagePath, "manifest"), stringify(manifest))); // Recover the image from disk. AWAIT_READY(store.get()->recover()); Image image; image.mutable_appc()->set_name("foo.com/bar"); Future<vector<string>> layers = store.get()->get(image); AWAIT_READY(layers); EXPECT_EQ(1u, layers.get().size()); ASSERT_SOME(os::realpath(imagePath)); EXPECT_EQ( os::realpath(path::join(imagePath, "rootfs")).get(), layers.get().front()); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()); driver.launchTasks(offers.get()[0].id(), tasks); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that a provisioner can recover the rootfs // provisioned by a previous provisioner and then destroy it. Note // that we use the copy backend in this test so Linux is not required. TEST_F(ProvisionerAppcTest, Recover) { // Create provisioner. slave::Flags flags; flags.image_providers = "APPC"; flags.appc_store_dir = path::join(os::getcwd(), "store"); flags.image_provisioner_backend = "copy"; flags.work_dir = "work_dir"; Fetcher fetcher; Try<Owned<Provisioner>> provisioner1 = Provisioner::create(flags, &fetcher); ASSERT_SOME(provisioner1); // Create a simple image in the store: // <store> // |--images // |--<id> // |--manifest // |--rootfs/tmp/test JSON::Value manifest = JSON::parse( "{" " \"acKind\": \"ImageManifest\"," " \"acVersion\": \"0.6.1\"," " \"name\": \"foo.com/bar\"" "}").get(); // The 'imageId' below has the correct format but it's not computed // by hashing the tarball of the image. It's OK here as we assume // the images under 'images' have passed such check when they are // downloaded and validated. string imageId = "sha512-e77d96aa0240eedf134b8c90baeaf76dca8e78691836301d7498c84020446042e" "797b296d6ab296e0954c2626bfb264322ebeb8f447dac4fac6511ea06bc61f0"; string imagePath = path::join(flags.appc_store_dir, "images", imageId); ASSERT_SOME(os::mkdir(path::join(imagePath, "rootfs", "tmp"))); ASSERT_SOME( os::write(path::join(imagePath, "rootfs", "tmp", "test"), "test")); ASSERT_SOME( os::write(path::join(imagePath, "manifest"), stringify(manifest))); // Recover. This is when the image in the store is loaded. AWAIT_READY(provisioner1.get()->recover({}, {})); Image image; image.mutable_appc()->set_name("foo.com/bar"); ContainerID containerId; containerId.set_value(UUID::random().toString()); Future<string> rootfs = provisioner1.get()->provision(containerId, image); AWAIT_READY(rootfs); // Create a new provisioner to recover the state from the container. Try<Owned<Provisioner>> provisioner2 = Provisioner::create(flags, &fetcher); ASSERT_SOME(provisioner2); mesos::slave::ContainerState state; // Here we are using an ExecutorInfo in the ContainerState without a // ContainerInfo. This is the situation where the Image is specified // via --default_container_info so it's not part of the recovered // ExecutorInfo. state.mutable_container_id()->CopyFrom(containerId); AWAIT_READY(provisioner2.get()->recover({state}, {})); // It's possible for the user to provision two different rootfses // from the same image. AWAIT_READY(provisioner2.get()->provision(containerId, image)); string provisionerDir = slave::paths::getProvisionerDir(flags.work_dir); string containerDir = slave::provisioner::paths::getContainerDir( provisionerDir, containerId); Try<hashmap<string, hashset<string>>> rootfses = slave::provisioner::paths::listContainerRootfses( provisionerDir, containerId); ASSERT_SOME(rootfses); // Verify that the rootfs is successfully provisioned. ASSERT_TRUE(rootfses->contains(flags.image_provisioner_backend)); EXPECT_EQ(2u, rootfses->get(flags.image_provisioner_backend)->size()); Future<bool> destroy = provisioner2.get()->destroy(containerId); AWAIT_READY(destroy); EXPECT_TRUE(destroy.get()); // The container directory is successfully cleaned up. EXPECT_FALSE(os::exists(containerDir)); }
TEST(HTTPConnectionTest, Serial) { Http http; http::URL url = http::URL( "http", http.process->self().address.ip, http.process->self().address.port, http.process->self().id + "/get"); Future<http::Connection> connect = http::connect(url); AWAIT_READY(connect); http::Connection connection = connect.get(); // First test a regular (non-streaming) request. Promise<http::Response> promise1; Future<http::Request> get1; EXPECT_CALL(*http.process, get(_)) .WillOnce(DoAll(FutureArg<0>(&get1), Return(promise1.future()))); http::Request request1; request1.method = "GET"; request1.url = url; request1.body = "1"; request1.keepAlive = true; Future<http::Response> response1 = connection.send(request1); AWAIT_READY(get1); EXPECT_EQ("1", get1->body); promise1.set(http::OK("1")); AWAIT_EXPECT_RESPONSE_BODY_EQ("1", response1); // Now test a streaming response. Promise<http::Response> promise2; Future<http::Request> get2; EXPECT_CALL(*http.process, get(_)) .WillOnce(DoAll(FutureArg<0>(&get2), Return(promise2.future()))); http::Request request2 = request1; request2.body = "2"; Future<http::Response> response2 = connection.send(request2, true); AWAIT_READY(get2); EXPECT_EQ("2", get2->body); promise2.set(http::OK("2")); AWAIT_READY(response2); ASSERT_SOME(response2->reader); http::Pipe::Reader reader = response2->reader.get(); AWAIT_EQ("2", reader.read()); AWAIT_EQ("", reader.read()); // Disconnect. AWAIT_READY(connection.disconnect()); AWAIT_READY(connection.disconnected()); // After disconnection, sends should fail. AWAIT_FAILED(connection.send(request1)); }
// Master contention and detection fail when the network is down, it // recovers when the network is back up. TEST_F(ZooKeeperMasterContenderDetectorTest, ContenderDetectorShutdownNetwork) { Clock::pause(); Try<zookeeper::URL> url = zookeeper::URL::parse( "zk://" + server->connectString() + "/mesos"); ASSERT_SOME(url); ZooKeeperMasterContender contender(url.get()); PID<Master> pid; pid.node.ip = 10000000; pid.node.port = 10000; MasterInfo master = internal::protobuf::createMasterInfo(pid); contender.initialize(master); Future<Future<Nothing> > contended = contender.contend(); AWAIT_READY(contended); Future<Nothing> lostCandidacy = contended.get(); ZooKeeperMasterDetector detector(url.get()); Future<Option<MasterInfo> > leader = detector.detect(); AWAIT_READY(leader); EXPECT_SOME_EQ(master, leader.get()); leader = detector.detect(leader.get()); // Shut down ZooKeeper and expect things to fail after the timeout. server->shutdownNetwork(); // We may need to advance multiple times because we could have // advanced the clock before the timer in Group starts. while (lostCandidacy.isPending() || leader.isPending()) { Clock::advance(MASTER_CONTENDER_ZK_SESSION_TIMEOUT); Clock::settle(); } // Local timeout does not fail the future but rather deems the // session has timed out and the candidacy is lost. EXPECT_TRUE(lostCandidacy.isReady()); EXPECT_NONE(leader.get()); // Re-contend and re-detect. contended = contender.contend(); leader = detector.detect(leader.get()); // Things will not change until the server restarts. Clock::advance(Minutes(1)); Clock::settle(); EXPECT_TRUE(contended.isPending()); EXPECT_TRUE(leader.isPending()); server->startNetwork(); // Operations will eventually succeed after ZK is restored. AWAIT_READY(contended); AWAIT_READY(leader); Clock::resume(); }
TEST(HTTPConnectionTest, Pipeline) { // We use two Processes here to ensure that libprocess performs // pipelining correctly when requests on a single connection // are going to different Processes. Http http1, http2; http::URL url1 = http::URL( "http", http1.process->self().address.ip, http1.process->self().address.port, http1.process->self().id + "/get"); http::URL url2 = http::URL( "http", http2.process->self().address.ip, http2.process->self().address.port, http2.process->self().id + "/get"); Future<http::Connection> connect = http::connect(url1); AWAIT_READY(connect); http::Connection connection = connect.get(); // Send three pipelined requests. Promise<http::Response> promise1, promise2, promise3; Future<http::Request> get1, get2, get3; EXPECT_CALL(*http1.process, get(_)) .WillOnce(DoAll(FutureArg<0>(&get1), Return(promise1.future()))) .WillOnce(DoAll(FutureArg<0>(&get3), Return(promise3.future()))); EXPECT_CALL(*http2.process, get(_)) .WillOnce(DoAll(FutureArg<0>(&get2), Return(promise2.future()))); http::Request request1, request2, request3; request1.method = "GET"; request2.method = "GET"; request3.method = "GET"; request1.url = url1; request2.url = url2; request3.url = url1; request1.body = "1"; request2.body = "2"; request3.body = "3"; request1.keepAlive = true; request2.keepAlive = true; request3.keepAlive = true; Future<http::Response> response1 = connection.send(request1); Future<http::Response> response2 = connection.send(request2, true); Future<http::Response> response3 = connection.send(request3); // Ensure the requests are all received before any // responses have been sent. AWAIT_READY(get1); AWAIT_READY(get2); AWAIT_READY(get3); EXPECT_EQ("1", get1->body); EXPECT_EQ("2", get2->body); EXPECT_EQ("3", get3->body); // Complete the responses in the opposite order, and ensure // that the pipelining in libprocess sends the responses in // the same order as the requests were received. promise3.set(http::OK("3")); promise2.set(http::OK("2")); EXPECT_TRUE(response1.isPending()); EXPECT_TRUE(response2.isPending()); EXPECT_TRUE(response3.isPending()); promise1.set(http::OK("1")); AWAIT_READY(response1); AWAIT_READY(response2); AWAIT_READY(response3); EXPECT_EQ("1", response1->body); ASSERT_SOME(response2->reader); http::Pipe::Reader reader = response2->reader.get(); AWAIT_EQ("2", reader.read()); AWAIT_EQ("", reader.read()); EXPECT_EQ("3", response3->body); // Disconnect. AWAIT_READY(connection.disconnect()); AWAIT_READY(connection.disconnected()); // After disconnection, sends should fail. AWAIT_FAILED(connection.send(request1)); }
// Tests whether a leading master correctly detects a new master when // its ZooKeeper session is expired (the follower becomes the new // leader). TEST_F(ZooKeeperMasterContenderDetectorTest, MasterDetectorExpireMasterZKSession) { // Simulate a leading master. Try<zookeeper::URL> url = zookeeper::URL::parse( "zk://" + server->connectString() + "/mesos"); ASSERT_SOME(url); PID<Master> pid; pid.node.ip = 10000000; pid.node.port = 10000; MasterInfo leader = internal::protobuf::createMasterInfo(pid); // Create the group instance so we can expire its session. Owned<zookeeper::Group> group( new Group(url.get(), MASTER_CONTENDER_ZK_SESSION_TIMEOUT)); ZooKeeperMasterContender leaderContender(group); leaderContender.initialize(leader); Future<Future<Nothing> > leaderContended = leaderContender.contend(); AWAIT_READY(leaderContended); Future<Nothing> leaderLostLeadership = leaderContended.get(); ZooKeeperMasterDetector leaderDetector(url.get()); Future<Option<MasterInfo> > detected = leaderDetector.detect(); AWAIT_READY(detected); EXPECT_SOME_EQ(leader, detected.get()); // Keep detecting. Future<Option<MasterInfo> > newLeaderDetected = leaderDetector.detect(detected.get()); // Simulate a following master. PID<Master> pid2; pid2.node.ip = 10000001; pid2.node.port = 10001; MasterInfo follower = internal::protobuf::createMasterInfo(pid2); ZooKeeperMasterDetector followerDetector(url.get()); ZooKeeperMasterContender followerContender(url.get()); followerContender.initialize(follower); Future<Future<Nothing> > followerContended = followerContender.contend(); AWAIT_READY(followerContended); LOG(INFO) << "The follower now is detecting the leader"; detected = followerDetector.detect(None()); AWAIT_READY(detected); EXPECT_SOME_EQ(leader, detected.get()); // Now expire the leader's zk session. Future<Option<int64_t> > session = group->session(); AWAIT_READY(session); EXPECT_SOME(session.get()); LOG(INFO) << "Now expire the ZK session: " << std::hex << session.get().get(); server->expireSession(session.get().get()); AWAIT_READY(leaderLostLeadership); // Wait for session expiration and ensure the former leader detects // a new leader. AWAIT_READY(newLeaderDetected); EXPECT_SOME(newLeaderDetected.get()); EXPECT_EQ(follower, newLeaderDetected.get().get()); }
// Tests that the logrotate container logger only closes FDs when it // is supposed to and does not interfere with other FDs on the agent. TEST_F(ContainerLoggerTest, LOGROTATE_ModuleFDOwnership) { // Create a master, agent, and framework. Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); // We'll need access to these flags later. slave::Flags flags = CreateSlaveFlags(); // Use the non-default container logger that rotates logs. flags.container_logger = LOGROTATE_CONTAINER_LOGGER_NAME; Fetcher fetcher(flags); // We use an actual containerizer + executor since we want something to run. Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, false, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); // Wait for an offer, and start a task. Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); // Start a task that will keep running until the end of the test. TaskInfo task = createTask(offers.get()[0], "sleep 100"); Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusKilled)) .WillRepeatedly(Return()); // Ignore subsequent updates. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(statusStarting); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Open multiple files, so that we're fairly certain we've opened // the same FDs (integers) opened by the container logger. vector<int> fds; for (int i = 0; i < 50; i++) { Try<int> fd = os::open(os::DEV_NULL, O_RDONLY); ASSERT_SOME(fd); fds.push_back(fd.get()); } // Kill the task, which also kills the executor. driver.killTask(statusRunning->task_id()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); Future<Nothing> executorTerminated = FUTURE_DISPATCH(_, &Slave::executorTerminated); AWAIT_READY(executorTerminated); // Close all the FDs we opened. Every `close` should succeed. foreach (int fd, fds) { ASSERT_SOME(os::close(fd)); }
// Checks that in the event of a master failure and the election of a // new master, if a slave reregisters before a framework that has // resources on reregisters, all used and unused resources are // accounted for correctly. TYPED_TEST(AllocatorZooKeeperTest, SlaveReregistersFirst) { TypeParam allocator1; Try<PID<Master> > master = this->StartMaster(&allocator1); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = this->CreateSlaveFlags(); flags.resources = Option<string>("cpus:2;mem:1024"); Try<PID<Slave> > slave = this->StartSlave(&exec, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, stringify(this->url.get())); EXPECT_CALL(sched, registered(&driver, _, _)); // The framework should be offered all of the resources on the slave // since it is the only framework running. EXPECT_CALL(sched, resourceOffers(&driver, OfferEq(2, 1024))) .WillOnce(LaunchTasks(1, 1, 500, "*")) .WillRepeatedly(DeclineOffers()); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Stop the failing master from telling the slave to shut down when // it is killed. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, _); // Stop the framework from reregistering with the new master until the // slave has reregistered. DROP_PROTOBUFS(ReregisterFrameworkMessage(), _, _); // Shutting down the masters will cause the scheduler to get // disconnected. EXPECT_CALL(sched, disconnected(_)); // Shutting down the masters will also cause the slave to shutdown // frameworks that are not checkpointing, thus causing the executor // to get shutdown. EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); this->ShutdownMasters(); AWAIT_READY(shutdownMessage); MockAllocatorProcess<TypeParam> allocator2; EXPECT_CALL(allocator2, initialize(_, _, _)); Try<PID<Master> > master2 = this->StartMaster(&allocator2); ASSERT_SOME(master2); Future<Nothing> slaveAdded; EXPECT_CALL(allocator2, slaveAdded(_, _, _)) .WillOnce(DoAll(InvokeSlaveAdded(&allocator2), FutureSatisfy(&slaveAdded))); EXPECT_CALL(sched, reregistered(&driver, _)); AWAIT_READY(slaveAdded); EXPECT_CALL(allocator2, frameworkAdded(_, _, _)); Future<vector<Offer> > resourceOffers2; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&resourceOffers2)); // We kill the filter so that ReregisterFrameworkMessages can get // to the master now that the framework has been added, ensuring // that the framework reregisters after the slave. process::filter(NULL); AWAIT_READY(resourceOffers2); // Since the task is still running on the slave, the framework // should only be offered the resources not being used by the task. EXPECT_THAT(resourceOffers2.get(), OfferEq(1, 524)); // Shut everything down. EXPECT_CALL(allocator2, resourcesRecovered(_, _, _)) .WillRepeatedly(DoDefault()); EXPECT_CALL(allocator2, frameworkDeactivated(_)) .Times(AtMost(1)); EXPECT_CALL(allocator2, frameworkRemoved(_)) .Times(AtMost(1)); driver.stop(); driver.join(); EXPECT_CALL(allocator2, slaveRemoved(_)) .Times(AtMost(1)); this->Shutdown(); }
Try<Nothing> Benchmark::execute(int argc, char** argv) { flags.setUsageMessage( "Usage: " + name() + " [options]\n" "\n" "This command is used to do performance test on the\n" "replicated log. It takes a trace file of write sizes\n" "and replay that trace to measure the latency of each\n" "write. The data to be written for each write can be\n" "specified using the --type flag.\n" "\n"); // Configure the tool by parsing command line arguments. if (argc > 0 && argv != NULL) { Try<Nothing> load = flags.load(None(), argc, argv); if (load.isError()) { return Error(flags.usage(load.error())); } if (flags.help) { return Error(flags.usage()); } process::initialize(); logging::initialize(argv[0], flags); } if (flags.quorum.isNone()) { return Error(flags.usage("Missing required option --quorum")); } if (flags.path.isNone()) { return Error(flags.usage("Missing required option --path")); } if (flags.servers.isNone()) { return Error(flags.usage("Missing required option --servers")); } if (flags.znode.isNone()) { return Error(flags.usage("Missing required option --znode")); } if (flags.input.isNone()) { return Error(flags.usage("Missing required option --input")); } if (flags.output.isNone()) { return Error(flags.usage("Missing required option --output")); } // Initialize the log. if (flags.initialize) { Initialize initialize; initialize.flags.path = flags.path; Try<Nothing> execution = initialize.execute(); if (execution.isError()) { return Error(execution.error()); } } // Create the log. Log log( flags.quorum.get(), flags.path.get(), flags.servers.get(), Seconds(10), flags.znode.get()); // Create the log writer. Log::Writer writer(&log); Future<Option<Log::Position> > position = writer.start(); if (!position.await(Seconds(15))) { return Error("Failed to start a log writer: timed out"); } else if (!position.isReady()) { return Error("Failed to start a log writer: " + (position.isFailed() ? position.failure() : "Discarded future")); } // Statistics to output. vector<Bytes> sizes; vector<Duration> durations; vector<Time> timestamps; // Read sizes from the input trace file. ifstream input(flags.input.get().c_str()); if (!input.is_open()) { return Error("Failed to open the trace file " + flags.input.get()); } string line; while (getline(input, line)) { Try<Bytes> size = Bytes::parse(strings::trim(line)); if (size.isError()) { return Error("Failed to parse the trace file: " + size.error()); } sizes.push_back(size.get()); } input.close(); // Generate the data to be written. vector<string> data; for (size_t i = 0; i < sizes.size(); i++) { if (flags.type == "one") { data.push_back(string(sizes[i].bytes(), static_cast<char>(0xff))); } else if (flags.type == "random") { data.push_back(string(sizes[i].bytes(), ::random() % 256)); } else { data.push_back(string(sizes[i].bytes(), 0)); } } Stopwatch stopwatch; stopwatch.start(); for (size_t i = 0; i < sizes.size(); i++) { Stopwatch stopwatch; stopwatch.start(); position = writer.append(data[i]); if (!position.await(Seconds(10))) { return Error("Failed to append: timed out"); } else if (!position.isReady()) { return Error("Failed to append: " + (position.isFailed() ? position.failure() : "Discarded future")); } else if (position.get().isNone()) { return Error("Failed to append: exclusive write promise lost"); } durations.push_back(stopwatch.elapsed()); timestamps.push_back(Clock::now()); } cout << "Total number of appends: " << sizes.size() << endl; cout << "Total time used: " << stopwatch.elapsed() << endl; // Ouput statistics. ofstream output(flags.output.get().c_str()); if (!output.is_open()) { return Error("Failed to open the output file " + flags.output.get()); } for (size_t i = 0; i < sizes.size(); i++) { output << timestamps[i] << " Appended " << sizes[i].bytes() << " bytes" << " in " << durations[i].ms() << " ms" << endl; } return Nothing(); }
// This test verifies that the image specified in the volume will be // properly provisioned and mounted into the container if container // root filesystem is not specified. TEST_P(VolumeImageIsolatorTest, ROOT_ImageInVolumeWithoutRootFilesystem) { string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "filesystem/linux,volume/image,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Fetcher fetcher(flags); Try<MesosContainerizer*> create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); Owned<Containerizer> containerizer(create.get()); ContainerID containerId; containerId.set_value(id::UUID::random().toString()); ContainerInfo container = createContainerInfo( None(), {createVolumeFromDockerImage("rootfs", "test_image", Volume::RW)}); CommandInfo command = createCommandInfo("test -d rootfs/bin"); ExecutorInfo executor = createExecutorInfo( "test_executor", nesting ? createCommandInfo("sleep 1000") : command); if (!nesting) { executor.mutable_container()->CopyFrom(container); } string directory = path::join(flags.work_dir, "sandbox"); ASSERT_SOME(os::mkdir(directory)); Future<Containerizer::LaunchResult> launch = containerizer->launch( containerId, createContainerConfig(None(), executor, directory), map<string, string>(), None()); AWAIT_ASSERT_EQ(Containerizer::LaunchResult::SUCCESS, launch); Future<Option<ContainerTermination>> wait = containerizer->wait(containerId); if (nesting) { ContainerID nestedContainerId; nestedContainerId.mutable_parent()->CopyFrom(containerId); nestedContainerId.set_value(id::UUID::random().toString()); launch = containerizer->launch( nestedContainerId, createContainerConfig(command, container), map<string, string>(), None()); AWAIT_ASSERT_EQ(Containerizer::LaunchResult::SUCCESS, launch); wait = containerizer->wait(nestedContainerId); } AWAIT_READY(wait); ASSERT_SOME(wait.get()); ASSERT_TRUE(wait->get().has_status()); EXPECT_WEXITSTATUS_EQ(0, wait->get().status()); if (nesting) { Future<Option<ContainerTermination>> termination = containerizer->destroy(containerId); AWAIT_READY(termination); ASSERT_SOME(termination.get()); ASSERT_TRUE(termination->get().has_status()); EXPECT_WTERMSIG_EQ(SIGKILL, termination.get()->status()); } }
TEST_F(GarbageCollectorIntegrationTest, DiskUsage) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&containerizer, flags); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Resources resources = Resources::parse(flags.resources.get()).get(); double cpus = resources.get<Value::Scalar>("cpus").get().value(); double mem = resources.get<Value::Scalar>("mem").get().value(); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, cpus, mem, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); const std::string& executorDir = slave::paths::getExecutorPath( flags.work_dir, slaveId, frameworkId.get(), DEFAULT_EXECUTOR_ID); ASSERT_TRUE(os::exists(executorDir)); Clock::pause(); // Kiling the executor will cause the slave to schedule its // directory to get garbage collected. EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); EXPECT_CALL(sched, statusUpdate(_, _)) .Times(AtMost(1)); // Ignore TASK_LOST from killed executor. // Kill the executor and inform the slave. containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID); AWAIT_READY(schedule); Clock::settle(); // Wait for GarbageCollectorProcess::schedule to complete. // We advance the clock here so that the 'removalTime' of the // executor directory is definitely less than 'flags.gc_delay' in // the GarbageCollectorProcess 'GarbageCollector::prune()' gets // called (below). Otherwise, due to double comparison precision // in 'prune()' the directory might not be deleted. Clock::advance(Seconds(1)); Future<Nothing> _checkDiskUsage = FUTURE_DISPATCH(_, &Slave::_checkDiskUsage); // Simulate a disk full message to the slave. process::dispatch( slave.get(), &Slave::_checkDiskUsage, Try<double>(1.0 - slave::GC_DISK_HEADROOM)); AWAIT_READY(_checkDiskUsage); Clock::settle(); // Wait for Slave::_checkDiskUsage to complete. // Executor's directory should be gc'ed by now. ASSERT_FALSE(os::exists(executorDir)); process::UPID files("files", process::node()); AWAIT_EXPECT_RESPONSE_STATUS_EQ( process::http::NotFound().status, process::http::get(files, "browse.json", "path=" + executorDir)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'isolator' gets deallocated. }