Ejemplo n.º 1
0
void ThreadPool::submitBarrier() {
	//Promises are not copyable, so we save a pointer and delete it later, after the barrier.
	auto promise = new std::promise<void>();
	std::shared_future<void> future(promise->get_future());
	auto counter = new std::atomic<int>();
	counter->store(0);
	int goal = thread_count;
	auto barrierJob = [counter, promise, future, goal] () {
		int currentCounter = counter->fetch_add(1); //increment by one and get the current counter.
		if(currentCounter == goal-1) { //we're finished.
			promise->set_value();
			delete promise; //we're done, this isn't needed anymore.
			//We got here, so we're the last one to manipulate the counter.
			delete counter;
		}
		else {
			//Otherwise, we wait for all the other barriers.
			future.wait();
		}
	};
	for(int i = 0; i < thread_count; i++) submitJob(barrierJob);
}
bool CommonBfJlModules::dispatch(NodePtr pNode)
{
	assert(m_pBfsHandler != nullptr);
	assert(m_pGameHandler != nullptr);
	std::ostringstream oss;
	if (m_pBfsHandler->isRunningJob(pNode)) {
		oss << "Error : the node to dispathch is running job" << std::endl;
		std::cerr << oss.str() << "> " << std::flush;
		pNode->appendComment(oss.str());
		return false;
	} else if (m_pBfsHandler->isFlagged(pNode)) {
		oss << "Error : the node to dispatch is flagged" << std::endl;
		std::cerr << oss.str() << "> " << std::flush;
		pNode->appendComment(oss.str());
		return false;
	}

	std::string sAppName = m_pGameHandler->getAppName();
	std::string sAppVersion = m_pGameHandler->getAppVersion();
	std::string sArgument = m_pGameHandler->prepareJobCommands(pNode);

	int iJId = submitJob(pNode, sAppName, sAppVersion, sArgument);

	oss.str("");
	oss << "Submit job ID " << iJId << ": " << sArgument << std::endl;
	pNode->appendComment(oss.str());

	if (JobLevelConfigure::g_configure.bUseJobLog) {
		std::ofstream fout(JobLevelConfigure::g_configure.sJobLogFile.c_str(), std::ios::app);
		fout << oss.str();
		fout.close();
	}

	m_nDoingJobs++;
	m_nTotalJobs++;
	return true;
}
Ejemplo n.º 3
0
/*
 * This file represents the client half of the JCEP communication.  It
 * operates in much the same way as the RMI skeleton expect for two important
 * points:
 * 1) It doesn't use JRMP.  Instead it uses JCEP.  JCEP is a much simpler and
 * more expressive protocol for this specific task.  This means that all the
 * complicated unwinding of serialized Java objects that the RMI skeleton had
 * are no longer necesasary.  This JCEP skeleton does not have to understand
 * one lick of Java.
 * 2) It is a two-way communication.  The RMI skeleton just feed a job to the
 * server and read the response, which either said it was received or not.  It
 * did not continue running for the duration of the job.  That means that SGE
 * couldn't track the job past its submission.  That also meant that problems
 * with the job that came after execution started had to be passed back to the
 * ComputeProxy is be logged.  Now the skeleton runs through the entire
 * duration of the job.  Any messages or errors generated as the job runs are
 * sent to the skeleton, which will print them out to stdout or stderr so that
 * SGE can log them.  That means that all information from the running of the
 * job is in the same SGE-generated files.  It also means that the job continues
 * to be trackable by SGE throughout its duration.  This two-way communication
 * also means that the JCEP skeleton has to be multi-threaded.
 *
 * Only one JCEPskeleton may be allowed to attach to a given job.  The reason is
 * that a failed command for one will terminate them all.  See the comments on
 * listenerThread() about line 600 for more info.
 */
int main (int argc, char** argv) {
	int serverPort, threadStatus, code;
	char serverIP[32];
	pthread_t threadId, sigThreadId;
	
	//Process arguments
	processArgs (argc, argv);
	
	//Set the buffering on STDOUT to line buffering
	setlinebuf (stdout);
	
	//Connect to server
	if (debug) {
		printf ("**DEBUG: Beginning skeleton...\n");
		printf ("**DEBUG: Creating socket to JCEP server...\n");
	}
	
	// Open a socket to RMI registry
	if (createConnection ("127.0.0.1", PORT) < 0) {
		fprintf (stderr, "Unable to establish connection to JCEP server:\n\t%s\n", errorMessage);
		exit (1);
	}
	
	if (debug) {
		printf ("**DEBUG: Connection established.\n");
	}
	
	/* The order of the following events is purposeful.  The signal thread must
	 * start before the listener thread so that the listener thread can inherit
	 * the signal mask set in startSignalThread().  The signal thread has to
	 * start after the command is sent because otherwise there could be
	 * synchronization problems with the main thread and the signal thread both
	 * trying to send a command to the server at the same time.  Starting the
	 * signal thread after the command does leave a small hole, where if SGE
	 * tried to cancel or suspend the job immediately after starting it, the
	 * signal could come before the signal thread is ready.  This should be fixed
	 * in the future with some explicit synchronization. */
	if (taskType == SUBMIT_JOB) {
		/* Send job to server */
		submitJob ();
	}
	else if (taskType == SHUTDOWN) {
		shutdownServer ();
	}
	else if (taskType == CHECKPOINT_JOB) {
		checkpointJob (jobId);
	}
	else if (taskType == CANCEL_JOB) {
		cancelJob (jobId);
	}
	else if (taskType == REGISTER) {
		listenToJob (jobId);
	}
	else if (taskType == SUSPEND) {
		suspendJob (jobId);
	}
	else if (taskType == RESUME) {
		resumeJob (jobId);
	}
	
	/* Start up a thread to handle SIGUSR1 (pending suspend), SIGUSR2 (pending
	 * cancel), and SIGCONT (just resumed). */
	if ((code = startSignalThread (&sigThreadId)) != 0) {
		fprintf (stderr, "Unable to start signal thread: error code %d.\n", code);
		exit (1);
	}
	
	/* Start listener thread */
	if ((code = startListenerThread (&threadId)) != 0) {
		fprintf (stderr, "Unable to start listener thread: error code %d.\n", code);
		exit (1);
	}
	
	/* Wait for a job finished notice */
	pthread_join (threadId, (void**)&threadStatus);
	
	if (debug) {
		printf ("**DEBUG: Listener thread has exited.\n");
	}
	
	if (threadStatus < 0) {
		fprintf (stderr, "Listener thread received the following error:\n\t%s\n", errorMessage);
		exit (1);
	}
	
	/* Exit upon receipt of job finished notice */
	return (EXIT_SUCCESS);
}