void ThreadPool::submitBarrier() { //Promises are not copyable, so we save a pointer and delete it later, after the barrier. auto promise = new std::promise<void>(); std::shared_future<void> future(promise->get_future()); auto counter = new std::atomic<int>(); counter->store(0); int goal = thread_count; auto barrierJob = [counter, promise, future, goal] () { int currentCounter = counter->fetch_add(1); //increment by one and get the current counter. if(currentCounter == goal-1) { //we're finished. promise->set_value(); delete promise; //we're done, this isn't needed anymore. //We got here, so we're the last one to manipulate the counter. delete counter; } else { //Otherwise, we wait for all the other barriers. future.wait(); } }; for(int i = 0; i < thread_count; i++) submitJob(barrierJob); }
bool CommonBfJlModules::dispatch(NodePtr pNode) { assert(m_pBfsHandler != nullptr); assert(m_pGameHandler != nullptr); std::ostringstream oss; if (m_pBfsHandler->isRunningJob(pNode)) { oss << "Error : the node to dispathch is running job" << std::endl; std::cerr << oss.str() << "> " << std::flush; pNode->appendComment(oss.str()); return false; } else if (m_pBfsHandler->isFlagged(pNode)) { oss << "Error : the node to dispatch is flagged" << std::endl; std::cerr << oss.str() << "> " << std::flush; pNode->appendComment(oss.str()); return false; } std::string sAppName = m_pGameHandler->getAppName(); std::string sAppVersion = m_pGameHandler->getAppVersion(); std::string sArgument = m_pGameHandler->prepareJobCommands(pNode); int iJId = submitJob(pNode, sAppName, sAppVersion, sArgument); oss.str(""); oss << "Submit job ID " << iJId << ": " << sArgument << std::endl; pNode->appendComment(oss.str()); if (JobLevelConfigure::g_configure.bUseJobLog) { std::ofstream fout(JobLevelConfigure::g_configure.sJobLogFile.c_str(), std::ios::app); fout << oss.str(); fout.close(); } m_nDoingJobs++; m_nTotalJobs++; return true; }
/* * This file represents the client half of the JCEP communication. It * operates in much the same way as the RMI skeleton expect for two important * points: * 1) It doesn't use JRMP. Instead it uses JCEP. JCEP is a much simpler and * more expressive protocol for this specific task. This means that all the * complicated unwinding of serialized Java objects that the RMI skeleton had * are no longer necesasary. This JCEP skeleton does not have to understand * one lick of Java. * 2) It is a two-way communication. The RMI skeleton just feed a job to the * server and read the response, which either said it was received or not. It * did not continue running for the duration of the job. That means that SGE * couldn't track the job past its submission. That also meant that problems * with the job that came after execution started had to be passed back to the * ComputeProxy is be logged. Now the skeleton runs through the entire * duration of the job. Any messages or errors generated as the job runs are * sent to the skeleton, which will print them out to stdout or stderr so that * SGE can log them. That means that all information from the running of the * job is in the same SGE-generated files. It also means that the job continues * to be trackable by SGE throughout its duration. This two-way communication * also means that the JCEP skeleton has to be multi-threaded. * * Only one JCEPskeleton may be allowed to attach to a given job. The reason is * that a failed command for one will terminate them all. See the comments on * listenerThread() about line 600 for more info. */ int main (int argc, char** argv) { int serverPort, threadStatus, code; char serverIP[32]; pthread_t threadId, sigThreadId; //Process arguments processArgs (argc, argv); //Set the buffering on STDOUT to line buffering setlinebuf (stdout); //Connect to server if (debug) { printf ("**DEBUG: Beginning skeleton...\n"); printf ("**DEBUG: Creating socket to JCEP server...\n"); } // Open a socket to RMI registry if (createConnection ("127.0.0.1", PORT) < 0) { fprintf (stderr, "Unable to establish connection to JCEP server:\n\t%s\n", errorMessage); exit (1); } if (debug) { printf ("**DEBUG: Connection established.\n"); } /* The order of the following events is purposeful. The signal thread must * start before the listener thread so that the listener thread can inherit * the signal mask set in startSignalThread(). The signal thread has to * start after the command is sent because otherwise there could be * synchronization problems with the main thread and the signal thread both * trying to send a command to the server at the same time. Starting the * signal thread after the command does leave a small hole, where if SGE * tried to cancel or suspend the job immediately after starting it, the * signal could come before the signal thread is ready. This should be fixed * in the future with some explicit synchronization. */ if (taskType == SUBMIT_JOB) { /* Send job to server */ submitJob (); } else if (taskType == SHUTDOWN) { shutdownServer (); } else if (taskType == CHECKPOINT_JOB) { checkpointJob (jobId); } else if (taskType == CANCEL_JOB) { cancelJob (jobId); } else if (taskType == REGISTER) { listenToJob (jobId); } else if (taskType == SUSPEND) { suspendJob (jobId); } else if (taskType == RESUME) { resumeJob (jobId); } /* Start up a thread to handle SIGUSR1 (pending suspend), SIGUSR2 (pending * cancel), and SIGCONT (just resumed). */ if ((code = startSignalThread (&sigThreadId)) != 0) { fprintf (stderr, "Unable to start signal thread: error code %d.\n", code); exit (1); } /* Start listener thread */ if ((code = startListenerThread (&threadId)) != 0) { fprintf (stderr, "Unable to start listener thread: error code %d.\n", code); exit (1); } /* Wait for a job finished notice */ pthread_join (threadId, (void**)&threadStatus); if (debug) { printf ("**DEBUG: Listener thread has exited.\n"); } if (threadStatus < 0) { fprintf (stderr, "Listener thread received the following error:\n\t%s\n", errorMessage); exit (1); } /* Exit upon receipt of job finished notice */ return (EXIT_SUCCESS); }