void
Group::onSessionInitiateFailure(const ProcessPtr &process, Session *session) {
	vector<Callback> actions;

	TRACE_POINT();
	// Standard resource management boilerplate stuff...
	PoolPtr pool = getPool();
	boost::unique_lock<boost::mutex> lock(pool->syncher);
	assert(process->isAlive());
	assert(isAlive() || getLifeStatus() == SHUTTING_DOWN);

	UPDATE_TRACE_POINT();
	P_DEBUG("Could not initiate a session with process " <<
		process->inspect() << ", detaching from pool if possible");
	if (!pool->detachProcessUnlocked(process, actions)) {
		P_DEBUG("Process was already detached");
	}
	pool->fullVerifyInvariants();
	lock.unlock();
	runAllActions(actions);
}
// The 'self' parameter is for keeping the current Group object alive while this thread is running.
void
Group::finalizeRestart(GroupPtr self, Options options, RestartMethod method,
	SpawnerFactoryPtr spawnerFactory, unsigned int restartsInitiated,
	vector<Callback> postLockActions)
{
	TRACE_POINT();

	Pool::runAllActions(postLockActions);
	postLockActions.clear();

	this_thread::disable_interruption di;
	this_thread::disable_syscall_interruption dsi;

	// Create a new spawner.
	SpawnerPtr newSpawner = spawnerFactory->create(options);
	SpawnerPtr oldSpawner;

	UPDATE_TRACE_POINT();
	PoolPtr pool = getPool();

	Pool::DebugSupportPtr debug = pool->debugSupport;
	if (debug != NULL && debug->restarting) {
		this_thread::restore_interruption ri(di);
		this_thread::restore_syscall_interruption rsi(dsi);
		this_thread::interruption_point();
		debug->debugger->send("About to end restarting");
		debug->messages->recv("Finish restarting");
	}

	ScopedLock l(pool->syncher);
	if (!isAlive()) {
		P_DEBUG("Group " << name << " is shutting down, so aborting restart");
		return;
	}
	if (restartsInitiated != this->restartsInitiated) {
		// Before this restart could be finalized, another restart command was given.
		// The spawner we just created might be out of date now so we abort.
		P_DEBUG("Restart of group " << name << " aborted because a new restart was initiated concurrently");
		if (debug != NULL && debug->restarting) {
			debug->debugger->send("Restarting aborted");
		}
		return;
	}

	// Run some sanity checks.
	pool->fullVerifyInvariants();
	assert(m_restarting);
	UPDATE_TRACE_POINT();
	
	// Atomically swap the new spawner with the old one.
	resetOptions(options);
	oldSpawner = spawner;
	spawner    = newSpawner;

	m_restarting = false;
	if (shouldSpawn()) {
		spawn();
	} else if (isWaitingForCapacity()) {
		P_INFO("Group " << name << " is waiting for capacity to become available. "
			"Trying to shutdown another idle process to free capacity...");
		if (pool->forceFreeCapacity(this, postLockActions) != NULL) {
			spawn();
		} else {
			P_INFO("There are no processes right now that are eligible "
				"for shutdown. Will try again later.");
		}
	}
	verifyInvariants();

	l.unlock();
	oldSpawner.reset();
	Pool::runAllActions(postLockActions);
	P_DEBUG("Restart of group " << name << " done");
	if (debug != NULL && debug->restarting) {
		debug->debugger->send("Restarting done");
	}
}
void
Group::spawnThreadRealMain(const SpawnerPtr &spawner, const Options &options, unsigned int restartsInitiated) {
	TRACE_POINT();
	this_thread::disable_interruption di;
	this_thread::disable_syscall_interruption dsi;

	PoolPtr pool = getPool();
	Pool::DebugSupportPtr debug = pool->debugSupport;
	
	bool done = false;
	while (!done) {
		bool shouldFail = false;
		if (debug != NULL && debug->spawning) {
			UPDATE_TRACE_POINT();
			this_thread::restore_interruption ri(di);
			this_thread::restore_syscall_interruption rsi(dsi);
			this_thread::interruption_point();
			string iteration;
			{
				LockGuard g(debug->syncher);
				debug->spawnLoopIteration++;
				iteration = toString(debug->spawnLoopIteration);
			}
			P_DEBUG("Begin spawn loop iteration " << iteration);
			debug->debugger->send("Begin spawn loop iteration " +
				iteration);
			
			vector<string> cases;
			cases.push_back("Proceed with spawn loop iteration " + iteration);
			cases.push_back("Fail spawn loop iteration " + iteration);
			MessagePtr message = debug->messages->recvAny(cases);
			shouldFail = message->name == "Fail spawn loop iteration " + iteration;
		}

		ProcessPtr process;
		ExceptionPtr exception;
		try {
			UPDATE_TRACE_POINT();
			this_thread::restore_interruption ri(di);
			this_thread::restore_syscall_interruption rsi(dsi);
			if (shouldFail) {
				throw SpawnException("Simulated failure");
			} else {
				process = spawner->spawn(options);
				process->setGroup(shared_from_this());
			}
		} catch (const thread_interrupted &) {
			break;
		} catch (const tracable_exception &e) {
			exception = copyException(e);
			// Let other (unexpected) exceptions crash the program so
			// gdb can generate a backtrace.
		}

		UPDATE_TRACE_POINT();
		ScopeGuard guard(boost::bind(Process::forceTriggerShutdownAndCleanup, process));
		boost::unique_lock<boost::mutex> lock(pool->syncher);

		if (!isAlive()) {
			if (process != NULL) {
				P_DEBUG("Group is being shut down so dropping process " <<
					process->inspect() << " which we just spawned and exiting spawn loop");
			} else {
				P_DEBUG("The group is being shut down. A process failed "
					"to be spawned anyway, so ignoring this error and exiting "
					"spawn loop");
			}
			// We stop immediately because any previously assumed invariants
			// may have been violated.
			break;
		} else if (restartsInitiated != this->restartsInitiated) {
			if (process != NULL) {
				P_DEBUG("A restart was issued for the group, so dropping process " <<
					process->inspect() << " which we just spawned and exiting spawn loop");
			} else {
				P_DEBUG("A restart was issued for the group. A process failed "
					"to be spawned anyway, so ignoring this error and exiting "
					"spawn loop");
			}
			// We stop immediately because any previously assumed invariants
			// may have been violated.
			break;
		}

		verifyInvariants();
		assert(m_spawning);
		assert(processesBeingSpawned > 0);

		processesBeingSpawned--;
		assert(processesBeingSpawned == 0);

		UPDATE_TRACE_POINT();
		vector<Callback> actions;
		if (process != NULL) {
			AttachResult result = attach(process, actions);
			if (result == AR_OK) {
				guard.clear();
				if (getWaitlist.empty()) {
					pool->assignSessionsToGetWaiters(actions);
				} else {
					assignSessionsToGetWaiters(actions);
				}
				P_DEBUG("New process count = " << enabledCount <<
					", remaining get waiters = " << getWaitlist.size());
			} else {
				done = true;
				P_DEBUG("Unable to attach spawned process " << process->inspect());
				if (result == AR_ANOTHER_GROUP_IS_WAITING_FOR_CAPACITY) {
					pool->possiblySpawnMoreProcessesForExistingGroups();
				}
			}
		} else {
			// TODO: sure this is the best thing? if there are
			// processes currently alive we should just use them.
			P_ERROR("Could not spawn process for group " << name <<
				": " << exception->what() << "\n" <<
				exception->backtrace());
			if (enabledCount == 0) {
				enableAllDisablingProcesses(actions);
			}
			Pool::assignExceptionToGetWaiters(getWaitlist, exception, actions);
			pool->assignSessionsToGetWaiters(actions);
			done = true;
		}

		done = done
			|| (processLowerLimitsSatisfied() && getWaitlist.empty())
			|| processUpperLimitsReached()
			|| pool->atFullCapacity(false);
		m_spawning = !done;
		if (done) {
			P_DEBUG("Spawn loop done");
		} else {
			processesBeingSpawned++;
			P_DEBUG("Continue spawning");
		}

		UPDATE_TRACE_POINT();
		pool->fullVerifyInvariants();
		lock.unlock();
		UPDATE_TRACE_POINT();
		runAllActions(actions);
		UPDATE_TRACE_POINT();
	}

	if (debug != NULL && debug->spawning) {
		debug->debugger->send("Spawn loop done");
	}
}
// The 'self' parameter is for keeping the current Group object alive while this thread is running.
void
Group::spawnThreadOOBWRequest(GroupPtr self, ProcessPtr process) {
	TRACE_POINT();
	this_thread::disable_interruption di;
	this_thread::disable_syscall_interruption dsi;

	Socket *socket;
	Connection connection;
	PoolPtr pool = getPool();
	Pool::DebugSupportPtr debug = pool->debugSupport;

	UPDATE_TRACE_POINT();
	P_DEBUG("Performing OOBW request for process " << process->inspect());
	if (debug != NULL && debug->oobw) {
		debug->debugger->send("OOBW request about to start");
		debug->messages->recv("Proceed with OOBW request");
	}
	
	UPDATE_TRACE_POINT();
	{
		// Standard resource management boilerplate stuff...
		boost::unique_lock<boost::mutex> lock(pool->syncher);
		if (OXT_UNLIKELY(!process->isAlive()
			|| process->enabled == Process::DETACHED
			|| !isAlive()))
		{
			return;
		}

		if (process->enabled != Process::DISABLED) {
			UPDATE_TRACE_POINT();
			P_INFO("Out-of-Band Work canceled: process " << process->inspect() <<
				" was concurrently re-enabled.");
			if (debug != NULL && debug->oobw) {
				debug->debugger->send("OOBW request canceled");
			}
			return;
		}
		
		assert(process->oobwStatus == Process::OOBW_IN_PROGRESS);
		assert(process->sessions == 0);
		socket = process->sessionSockets.top();
		assert(socket != NULL);
	}
	
	UPDATE_TRACE_POINT();
	unsigned long long timeout = 1000 * 1000 * 60; // 1 min
	try {
		this_thread::restore_interruption ri(di);
		this_thread::restore_syscall_interruption rsi(dsi);

		// Grab a connection. The connection is marked as fail in order to
		// ensure it is closed / recycled after this request (otherwise we'd
		// need to completely read the response).
		connection = socket->checkoutConnection();
		connection.fail = true;
		ScopeGuard guard(boost::bind(&Socket::checkinConnection, socket, connection));
		
		// This is copied from RequestHandler when it is sending data using the
		// "session" protocol.
		char sizeField[sizeof(uint32_t)];
		SmallVector<StaticString, 10> data;

		data.push_back(StaticString(sizeField, sizeof(uint32_t)));
		data.push_back(makeStaticStringWithNull("REQUEST_METHOD"));
		data.push_back(makeStaticStringWithNull("OOBW"));

		data.push_back(makeStaticStringWithNull("PASSENGER_CONNECT_PASSWORD"));
		data.push_back(makeStaticStringWithNull(process->connectPassword));

		uint32_t dataSize = 0;
		for (unsigned int i = 1; i < data.size(); i++) {
			dataSize += (uint32_t) data[i].size();
		}
		Uint32Message::generate(sizeField, dataSize);

		gatheredWrite(connection.fd, &data[0], data.size(), &timeout);

		// We do not care what the actual response is ... just wait for it.
		UPDATE_TRACE_POINT();
		waitUntilReadable(connection.fd, &timeout);
	} catch (const SystemException &e) {
		P_ERROR("*** ERROR: " << e.what() << "\n" << e.backtrace());
	} catch (const TimeoutException &e) {
		P_ERROR("*** ERROR: " << e.what() << "\n" << e.backtrace());
	}
	
	UPDATE_TRACE_POINT();
	vector<Callback> actions;
	{
		// Standard resource management boilerplate stuff...
		PoolPtr pool = getPool();
		boost::unique_lock<boost::mutex> lock(pool->syncher);
		if (OXT_UNLIKELY(!process->isAlive() || !isAlive())) {
			return;
		}
		
		process->oobwStatus = Process::OOBW_NOT_ACTIVE;
		if (process->enabled == Process::DISABLED) {
			enable(process, actions);
			assignSessionsToGetWaiters(actions);
		}

		pool->fullVerifyInvariants();

		initiateNextOobwRequest();
	}
	UPDATE_TRACE_POINT();
	runAllActions(actions);
	actions.clear();

	UPDATE_TRACE_POINT();
	P_DEBUG("Finished OOBW request for process " << process->inspect());
	if (debug != NULL && debug->oobw) {
		debug->debugger->send("OOBW request finished");
	}
}
void
Group::onSessionClose(const ProcessPtr &process, Session *session) {
	TRACE_POINT();
	// Standard resource management boilerplate stuff...
	PoolPtr pool = getPool();
	boost::unique_lock<boost::mutex> lock(pool->syncher);
	assert(process->isAlive());
	assert(isAlive() || getLifeStatus() == SHUTTING_DOWN);

	P_TRACE(2, "Session closed for process " << process->inspect());
	verifyInvariants();
	UPDATE_TRACE_POINT();
	
	/* Update statistics. */
	process->sessionClosed(session);
	assert(process->getLifeStatus() == Process::ALIVE);
	assert(process->enabled == Process::ENABLED
		|| process->enabled == Process::DISABLING
		|| process->enabled == Process::DETACHED);
	if (process->enabled == Process::ENABLED) {
		pqueue.decrease(process->pqHandle, process->busyness());
	}

	/* This group now has a process that's guaranteed to be not
	 * totally busy.
	 */
	assert(!process->isTotallyBusy());

	bool detachingBecauseOfMaxRequests = false;
	bool detachingBecauseCapacityNeeded = false;
	bool shouldDetach =
		( detachingBecauseOfMaxRequests = (
			options.maxRequests > 0
			&& process->processed >= options.maxRequests
		)) || (
			detachingBecauseCapacityNeeded = (
				process->sessions == 0
				&& getWaitlist.empty()
				&& (
					!pool->getWaitlist.empty()
					|| anotherGroupIsWaitingForCapacity()
				)
			)
		);
	bool shouldDisable =
		process->enabled == Process::DISABLING
		&& process->sessions == 0
		&& enabledCount > 0;

	if (shouldDetach || shouldDisable) {
		vector<Callback> actions;

		if (shouldDetach) {
			if (detachingBecauseCapacityNeeded) {
				/* Someone might be trying to get() a session for a different
				 * group that couldn't be spawned because of lack of pool capacity.
				 * If this group isn't under sufficiently load (as apparent by the
				 * checked conditions) then now's a good time to detach
				 * this process or group in order to free capacity.
				 */
				P_DEBUG("Process " << process->inspect() << " is no longer totally "
					"busy; detaching it in order to make room in the pool");
			} else {
				/* This process has processed its maximum number of requests,
				 * so we detach it.
				 */
				P_DEBUG("Process " << process->inspect() <<
					" has reached its maximum number of requests (" <<
					options.maxRequests << "); detaching it");
			}
			pool->detachProcessUnlocked(process, actions);
		} else {
			removeProcessFromList(process, disablingProcesses);
			addProcessToList(process, disabledProcesses);
			removeFromDisableWaitlist(process, DR_SUCCESS, actions);
			maybeInitiateOobw(process);
		}
		
		pool->fullVerifyInvariants();
		lock.unlock();
		runAllActions(actions);

	} else {
		// This could change process->enabled.
		maybeInitiateOobw(process);

		if (!getWaitlist.empty() && process->enabled == Process::ENABLED) {
			/* If there are clients on this group waiting for a process to
			 * become available then call them now.
			 */
			UPDATE_TRACE_POINT();
			// Already calls verifyInvariants().
			assignSessionsToGetWaitersQuickly(lock);
		}
	}
}