Beispiel #1
0
void createServerInstanceDirAndGeneration(ServerInstanceDirPtr &serverInstanceDir,
                                          ServerInstanceDir::GenerationPtr &generation)
{
	serverInstanceDir.reset(new ServerInstanceDir(getpid()));
	generation = serverInstanceDir->newGeneration(geteuid() == 0,
		"nobody", getPrimaryGroupName("nobody"),
		geteuid(), getegid());
}
Beispiel #2
0
int
main(int argc, char *argv[]) {
	disableOomKiller();
	
	agentsOptions = initializeAgent(argc, argv, "PassengerWatchdog");
	logLevel      = agentsOptions.getInt("log_level");
	webServerPid  = agentsOptions.getPid("web_server_pid");
	tempDir       = agentsOptions.get("temp_dir");
	userSwitching = agentsOptions.getBool("user_switching");
	defaultUser   = agentsOptions.get("default_user");
	defaultGroup  = agentsOptions.get("default_group");
	webServerWorkerUid = agentsOptions.getUid("web_server_worker_uid");
	webServerWorkerGid = agentsOptions.getGid("web_server_worker_gid");
	passengerRoot = agentsOptions.get("passenger_root");
	rubyCommand   = agentsOptions.get("ruby");
	maxPoolSize        = agentsOptions.getInt("max_pool_size");
	maxInstancesPerApp = agentsOptions.getInt("max_instances_per_app");
	poolIdleTime       = agentsOptions.getInt("pool_idle_time");
	serializedPrestartURLs  = agentsOptions.get("prestart_urls");
	
	try {
		randomGenerator = new RandomGenerator();
		errorEvent = new EventFd();
		
		MessageChannel feedbackChannel(FEEDBACK_FD);
		serverInstanceDir.reset(new ServerInstanceDir(webServerPid, tempDir));
		generation = serverInstanceDir->newGeneration(userSwitching, defaultUser,
			defaultGroup, webServerWorkerUid, webServerWorkerGid);
		agentsOptions.set("server_instance_dir", serverInstanceDir->getPath());
		agentsOptions.setInt("generation_number", generation->getNumber());
		
		ServerInstanceDirToucher serverInstanceDirToucher;
		ResourceLocator resourceLocator(passengerRoot);
		if (agentsOptions.get("analytics_server", false).empty()) {
			// Using local, server instance specific logging agent.
			loggingAgentAddress  = "unix:" + generation->getPath() + "/logging.socket";
			loggingAgentPassword = randomGenerator->generateAsciiString(64);
		} else {
			// Using remote logging agent.
			loggingAgentAddress = agentsOptions.get("analytics_server");
		}
		
		HelperAgentWatcher helperAgentWatcher(resourceLocator);
		LoggingAgentWatcher loggingAgentWatcher(resourceLocator);
		
		vector<AgentWatcher *> watchers;
		vector<AgentWatcher *>::iterator it;
		watchers.push_back(&helperAgentWatcher);
		if (agentsOptions.get("analytics_server", false).empty()) {
			watchers.push_back(&loggingAgentWatcher);
		}
		
		for (it = watchers.begin(); it != watchers.end(); it++) {
			try {
				(*it)->start();
			} catch (const std::exception &e) {
				feedbackChannel.write("Watchdog startup error",
					e.what(), NULL);
				forceAllAgentsShutdown(watchers);
				return 1;
			}
			// Allow other exceptions to propagate and crash the watchdog.
		}
		for (it = watchers.begin(); it != watchers.end(); it++) {
			try {
				(*it)->startWatching();
			} catch (const std::exception &e) {
				feedbackChannel.write("Watchdog startup error",
					e.what(), NULL);
				forceAllAgentsShutdown(watchers);
				return 1;
			}
			// Allow other exceptions to propagate and crash the watchdog.
		}
		
		feedbackChannel.write("Basic startup info",
			serverInstanceDir->getPath().c_str(),
			toString(generation->getNumber()).c_str(),
			NULL);
		
		for (it = watchers.begin(); it != watchers.end(); it++) {
			(*it)->sendStartupInfo(feedbackChannel);
		}
		
		feedbackChannel.write("All agents started", NULL);
		
		this_thread::disable_interruption di;
		this_thread::disable_syscall_interruption dsi;
		bool exitGracefully = waitForStarterProcessOrWatchers(watchers);
		AgentWatcher::stopWatching(watchers);
		if (exitGracefully) {
			/* Fork a child process which cleans up all the agent processes in
			 * the background and exit this watchdog process so that we don't block
			 * the web server.
			 */
			cleanupAgentsInBackground(watchers);
			return 0;
		} else {
			P_DEBUG("Web server did not exit gracefully, forcing shutdown of all service processes...");
			forceAllAgentsShutdown(watchers);
			return 1;
		}
	} catch (const tracable_exception &e) {
		P_ERROR(e.what() << "\n" << e.backtrace());
		return 1;
	} catch (const std::exception &e) {
		P_ERROR(e.what());
		return 1;
	}
}
Beispiel #3
0
	static void
	threadMain() {
		while (!this_thread::interruption_requested()) {
			syscalls::sleep(60 * 60 * 6);
			
			begin_touch:
			
			this_thread::disable_interruption di;
			this_thread::disable_syscall_interruption dsi;
			// Fork a process which touches everything in the server instance dir.
			pid_t pid = syscalls::fork();
			if (pid == 0) {
				// Child
				int prio, ret, e;
				
				closeAllFileDescriptors(2);
				
				// Make process nicer.
				do {
					prio = getpriority(PRIO_PROCESS, getpid());
				} while (prio == -1 && errno == EINTR);
				if (prio != -1) {
					prio++;
					if (prio > 20) {
						prio = 20;
					}
					do {
						ret = setpriority(PRIO_PROCESS, getpid(), prio);
					} while (ret == -1 && errno == EINTR);
				} else {
					perror("getpriority");
				}
				
				do {
					ret = chdir(serverInstanceDir->getPath().c_str());
				} while (ret == -1 && errno == EINTR);
				if (ret == -1) {
					e = errno;
					fprintf(stderr, "chdir(\"%s\") failed: %s (%d)\n",
						serverInstanceDir->getPath().c_str(),
						strerror(e), e);
					fflush(stderr);
					_exit(1);
				}
				
				execlp("/bin/sh", "/bin/sh", "-c", "find . | xargs touch", (char *) 0);
				e = errno;
				fprintf(stderr, "Cannot execute 'find . | xargs touch': %s (%d)\n",
					strerror(e), e);
				fflush(stderr);
				_exit(1);
			} else if (pid == -1) {
				// Error
				P_WARN("Could touch the server instance directory because "
					"fork() failed. Retrying in 2 minutes...");
				this_thread::restore_interruption si(di);
				this_thread::restore_syscall_interruption rsi(dsi);
				syscalls::sleep(60 * 2);
				goto begin_touch;
			} else {
				syscalls::waitpid(pid, NULL, 0);
			}
		}
	}
Beispiel #4
0
static void
cleanupAgentsInBackground(vector<AgentWatcher *> &watchers) {
	this_thread::disable_interruption di;
	this_thread::disable_syscall_interruption dsi;
	pid_t pid;
	int e;
	
	pid = fork();
	if (pid == 0) {
		// Child
		vector<AgentWatcher *>::const_iterator it;
		Timer timer(false);
		fd_set fds, fds2;
		int max, agentProcessesDone;
		unsigned long long deadline = 30000; // miliseconds
		
		// Wait until all agent processes have exited.
		
		max = 0;
		FD_ZERO(&fds);
		for (it = watchers.begin(); it != watchers.end(); it++) {
			FD_SET((*it)->getFeedbackFd(), &fds);
			if ((*it)->getFeedbackFd() > max) {
				max = (*it)->getFeedbackFd();
			}
		}
		
		timer.start();
		agentProcessesDone = 0;
		while (agentProcessesDone != -1
		    && agentProcessesDone < (int) watchers.size()
		    && timer.elapsed() < deadline)
		{
			struct timeval timeout;
			
			#ifdef FD_COPY
				FD_COPY(&fds, &fds2);
			#else
				FD_ZERO(&fds2);
				for (it = watchers.begin(); it != watchers.end(); it++) {
					FD_SET((*it)->getFeedbackFd(), &fds2);
				}
			#endif
			
			timeout.tv_sec = 0;
			timeout.tv_usec = 10000;
			agentProcessesDone = syscalls::select(max + 1, &fds2, NULL, NULL, &timeout);
			if (agentProcessesDone > 0 && timer.elapsed() < deadline) {
				usleep(10000);
			}
		}
		
		if (agentProcessesDone == -1 || timer.elapsed() >= deadline) {
			// An error occurred or we've waited long enough. Kill all the
			// processes.
			P_WARN("Some Phusion Passenger agent processes did not exit " <<
				"in time, forcefully shutting down all.");
			for (it = watchers.begin(); it != watchers.end(); it++) {
				(*it)->forceShutdown();
			}
		} else {
			P_DEBUG("All Phusion Passenger agent processes have exited.");
		}
		
		// Now clean up the server instance directory.
		delete generation.get();
		delete serverInstanceDir.get();
		
		_exit(0);
		
	} else if (pid == -1) {
		// Error
		e = errno;
		throw SystemException("fork() failed", errno);
		
	} else {
		// Parent
		
		// Let child process handle cleanup.
		serverInstanceDir->detach();
		generation->detach();
	}
}
Beispiel #5
0
int
main(int argc, char *argv[]) {
	/*
	 * Some Apache installations (like on OS X) redirect stdout to /dev/null,
	 * so that only stderr is redirected to the log file. We therefore
	 * forcefully redirect stdout to stderr so that everything ends up in the
	 * same place.
	 */
	dup2(2, 1);

	/*
	 * Most operating systems overcommit memory. We *know* that this watchdog process
	 * doesn't use much memory; on OS X it uses about 200 KB of private RSS. If the
	 * watchdog is killed by the system Out-Of-Memory Killer or then it's all over:
	 * the system administrator will have to restart the web server for Phusion
	 * Passenger to be usable again. So here we disable Linux's OOM killer
	 * for this watchdog. Note that the OOM score is inherited by child processes
	 * so we need to restore it after each fork().
	 */
	oldOomScore = setOomScoreNeverKill();
	
	agentsOptions = initializeAgent(argc, argv, "PassengerWatchdog");
	agentsOptions
		.setDefaultInt ("log_level", DEFAULT_LOG_LEVEL)
		.setDefault    ("temp_dir", getSystemTempDir())

		.setDefaultBool("user_switching", true)
		.setDefault    ("default_user", DEFAULT_WEB_APP_USER)
		.setDefaultUid ("web_server_worker_uid", getuid())
		.setDefaultGid ("web_server_worker_gid", getgid())
		.setDefault    ("ruby", DEFAULT_RUBY)
		.setDefault    ("python", DEFAULT_PYTHON)
		.setDefaultInt ("max_pool_size", DEFAULT_MAX_POOL_SIZE)
		.setDefaultInt ("max_instances_per_app", DEFAULT_MAX_INSTANCES_PER_APP)
		.setDefaultInt ("pool_idle_time", DEFAULT_POOL_IDLE_TIME);

	P_DEBUG("Starting Watchdog...");
	
	try {
		TRACE_POINT();
		// Required options
		passengerRoot = agentsOptions.get("passenger_root");
		webServerPid  = agentsOptions.getPid("web_server_pid");

		// Optional options
		UPDATE_TRACE_POINT();
		tempDir       = agentsOptions.get("temp_dir");
		userSwitching = agentsOptions.getBool("user_switching");
		defaultUser   = agentsOptions.get("default_user");
		if (!agentsOptions.has("default_group")) {
			agentsOptions.set("default_group", inferDefaultGroup(defaultUser));
		}
		defaultGroup       = agentsOptions.get("default_group");
		webServerWorkerUid = agentsOptions.getUid("web_server_worker_uid");
		webServerWorkerGid = agentsOptions.getGid("web_server_worker_gid");

		UPDATE_TRACE_POINT();
		randomGenerator = new RandomGenerator();
		errorEvent = new EventFd();
		
		UPDATE_TRACE_POINT();
		serverInstanceDir.reset(new ServerInstanceDir(webServerPid, tempDir));
		generation = serverInstanceDir->newGeneration(userSwitching, defaultUser,
			defaultGroup, webServerWorkerUid, webServerWorkerGid);
		agentsOptions.set("server_instance_dir", serverInstanceDir->getPath());
		agentsOptions.setInt("generation_number", generation->getNumber());
		
		UPDATE_TRACE_POINT();
		ServerInstanceDirToucher serverInstanceDirToucher;
		ResourceLocator resourceLocator(passengerRoot);
		if (agentsOptions.get("analytics_server", false).empty()) {
			// Using local, server instance specific logging agent.
			loggingAgentAddress  = "unix:" + generation->getPath() + "/logging.socket";
			loggingAgentPassword = randomGenerator->generateAsciiString(64);
		} else {
			// Using remote logging agent.
			loggingAgentAddress = agentsOptions.get("analytics_server");
		}
		
		UPDATE_TRACE_POINT();
		shared_ptr<HelperAgentWatcher> helperAgentWatcher =
			make_shared<HelperAgentWatcher>(resourceLocator);
		shared_ptr<LoggingAgentWatcher> loggingAgentWatcher =
			make_shared<LoggingAgentWatcher>(resourceLocator);
		
		UPDATE_TRACE_POINT();
		vector<AgentWatcherPtr> watchers;
		vector<AgentWatcherPtr>::iterator it;
		watchers.push_back(helperAgentWatcher);
		if (agentsOptions.get("analytics_server", false).empty()) {
			watchers.push_back(loggingAgentWatcher);
		}
		
		UPDATE_TRACE_POINT();
		for (it = watchers.begin(); it != watchers.end(); it++) {
			try {
				(*it)->start();
			} catch (const std::exception &e) {
				writeArrayMessage(FEEDBACK_FD,
					"Watchdog startup error",
					e.what(),
					NULL);
				forceAllAgentsShutdown(watchers);
				return 1;
			}
			// Allow other exceptions to propagate and crash the watchdog.
		}
		UPDATE_TRACE_POINT();
		for (it = watchers.begin(); it != watchers.end(); it++) {
			try {
				(*it)->startWatching();
			} catch (const std::exception &e) {
				writeArrayMessage(FEEDBACK_FD,
					"Watchdog startup error",
					e.what(),
					NULL);
				forceAllAgentsShutdown(watchers);
				return 1;
			}
			// Allow other exceptions to propagate and crash the watchdog.
		}
		
		UPDATE_TRACE_POINT();
		writeArrayMessage(FEEDBACK_FD,
			"Basic startup info",
			serverInstanceDir->getPath().c_str(),
			toString(generation->getNumber()).c_str(),
			NULL);
		
		UPDATE_TRACE_POINT();
		for (it = watchers.begin(); it != watchers.end(); it++) {
			(*it)->sendStartupInfo(FEEDBACK_FD);
		}
		
		UPDATE_TRACE_POINT();
		writeArrayMessage(FEEDBACK_FD, "All agents started", NULL);
		P_DEBUG("All Phusion Passenger agents started!");
		
		this_thread::disable_interruption di;
		this_thread::disable_syscall_interruption dsi;
		UPDATE_TRACE_POINT();
		bool exitGracefully = waitForStarterProcessOrWatchers(watchers);
		if (exitGracefully) {
			/* Fork a child process which cleans up all the agent processes in
			 * the background and exit this watchdog process so that we don't block
			 * the web server.
			 */
			P_DEBUG("Web server exited gracefully; gracefully shutting down all agents...");
		} else {
			P_DEBUG("Web server did not exit gracefully, forcing shutdown of all agents...");
		}
		UPDATE_TRACE_POINT();
		AgentWatcher::stopWatching(watchers);
		if (exitGracefully) {
			UPDATE_TRACE_POINT();
			cleanupAgentsInBackground(watchers);
			return 0;
		} else {
			UPDATE_TRACE_POINT();
			forceAllAgentsShutdown(watchers);
			return 1;
		}
	} catch (const tracable_exception &e) {
		P_ERROR(e.what() << "\n" << e.backtrace());
		return 1;
	} catch (const std::exception &e) {
		P_ERROR(e.what());
		return 1;
	}
}