bool Scheduler::initialize() { assert(!_initialized); uint_fast32_t _numHardwareThreads = getNumThreads(); _taskSchedulerInit = new tbb::task_scheduler_init(_numHardwareThreads + 1); _initialized = true; return true; }
Garnet::ThreadPoolMultiple::ThreadPoolMultiple(int numThreads) : ThreadPool(numThreads) { _isTerminated = false; // Start threads. for (int i = 0; i < getNumThreads(); ++i) { _threads.push_back(std::thread(threadFunc, this, i)); } }
void col2im(THClState *state, THClTensor* col, const int channels, const int height, const int width, const int patch_h, const int patch_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, THClTensor* im) { int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1; int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1; int num_kernels = channels * height * width; // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. EasyCL *cl = im->storage->cl; std::string uniqueName = "SpatialConvolutionMM::col2im"; CLKernel *kernel = 0; if(cl->kernelExists(uniqueName)) { kernel = cl->getKernel(uniqueName); } else { TemplatedKernel kernelBuilder(cl); kernel = kernelBuilder.buildKernel(uniqueName, "SpatialConvolutionMM.cl", getKernelTemplate(), "col2im_kernel"); } THClKernels k(state, kernel); k.in(num_kernels); k.in(col); k.in(height); k.in(width); k.in(channels); k.in(patch_h); k.in(patch_w); k.in(pad_h); k.in(pad_w); k.in(stride_h); k.in(stride_w); k.in(height_col); k.in(width_col); k.out(im); k.run(GET_BLOCKS(state, num_kernels), getNumThreads(state)); // col2im_kernel <<<GET_BLOCKS(num_kernels), CL_NUM_THREADS, 0, stream>>> ( // num_kernels, data_col, height, width, channels, // patch_h, patch_w, pad_h, pad_w, stride_h, stride_w, // height_col, width_col, data_im // ); // THError("Not implemented"); }
void im2col(THClState *state, THClTensor* im, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, THClTensor* col) { // We are going to launch channels * height_col * width_col kernels, each // kernel responsible for copying a single-channel grid. int height_col = (height + 2 * pad_h - ksize_h) / stride_h + 1; int width_col = (width + 2 * pad_w - ksize_w) / stride_w + 1; int num_kernels = channels * height_col * width_col; std::string uniqueName = "SpatialConvolutionMM::im2col"; EasyCL *cl = im->storage->cl; CLKernel *kernel = 0; if(cl->kernelExists(uniqueName)) { kernel = cl->getKernel(uniqueName); } else { TemplatedKernel kernelBuilder(cl); kernel = kernelBuilder.buildKernel(uniqueName, "SpatialConvolutionMM.cl", getKernelTemplate(), "im2col_kernel"); } THClKernels k(state, kernel); k.in(num_kernels); k.in(im); k.in(height); k.in(width); k.in(ksize_h); k.in(ksize_w); k.in(pad_h); k.in(pad_w); k.in(stride_h); k.in(stride_w); k.in(height_col); k.in(width_col); k.out(col); k.run(GET_BLOCKS(state, num_kernels), getNumThreads(state)); // Launch // im2col_kernel <<<GET_BLOCKS(num_kernels), CL_NUM_THREADS, 0, stream>>> ( // num_kernels, data_im, height, width, ksize_h, ksize_w, // pad_h, pad_w, stride_h, stride_w, // height_col, width_col, data_col // ); }
int _rsDataObjCopy( rsComm_t *rsComm, int destL1descInx, int existFlag, transferStat_t **transStat ) { dataObjInp_t *srcDataObjInp, *destDataObjInp; openedDataObjInp_t dataObjCloseInp; dataObjInfo_t *srcDataObjInfo, *destDataObjInfo; int srcL1descInx; int status = 0, status2; destDataObjInp = L1desc[destL1descInx].dataObjInp; destDataObjInfo = L1desc[destL1descInx].dataObjInfo; srcL1descInx = L1desc[destL1descInx].srcL1descInx; srcDataObjInp = L1desc[srcL1descInx].dataObjInp; srcDataObjInfo = L1desc[srcL1descInx].dataObjInfo; if ( destDataObjInp == NULL ) { // JMC cppcheck - null ptr ref rodsLog( LOG_ERROR, "_rsDataObjCopy: :: destDataObjInp is NULL" ); return -1; } if ( destDataObjInfo == NULL ) { // JMC cppcheck - null ptr ref rodsLog( LOG_ERROR, "_rsDataObjCopy: :: destDataObjInfo is NULL" ); return -1; } if ( srcDataObjInp == NULL ) { // JMC cppcheck - null ptr ref rodsLog( LOG_ERROR, "_rsDataObjCopy: :: srcDataObjInp is NULL" ); return -1; } if ( srcDataObjInfo == NULL ) { // JMC cppcheck - null ptr ref rodsLog( LOG_ERROR, "_rsDataObjCopy: :: srcDataObjInfo is NULL" ); return -1; } if ( L1desc[srcL1descInx].l3descInx <= 2 ) { /* no physical file was opened */ status = l3DataCopySingleBuf( rsComm, destL1descInx ); /* has not been registered yet because of NO_OPEN_FLAG_KW */ if ( status >= 0 && existFlag == 0 && destDataObjInfo->specColl == NULL && L1desc[destL1descInx].remoteZoneHost == NULL ) { /* If the dest is in remote zone, register in _rsDataObjClose there */ status = svrRegDataObj( rsComm, destDataObjInfo ); if ( status == CAT_UNKNOWN_COLLECTION ) { /* collection does not exist. make one */ char parColl[MAX_NAME_LEN], child[MAX_NAME_LEN]; splitPathByKey( destDataObjInfo->objPath, parColl, MAX_NAME_LEN, child, MAX_NAME_LEN, '/' ); status = svrRegDataObj( rsComm, destDataObjInfo ); rsMkCollR( rsComm, "/", parColl ); status = svrRegDataObj( rsComm, destDataObjInfo ); } if ( status < 0 ) { rodsLog( LOG_NOTICE, "_rsDataObjCopy: svrRegDataObj for %s failed, status = %d", destDataObjInfo->objPath, status ); return status; } } } else { if ( srcDataObjInfo != NULL ) { destDataObjInp->numThreads = getNumThreads( rsComm, srcDataObjInfo->dataSize, destDataObjInp->numThreads, NULL, destDataObjInfo->rescHier, srcDataObjInfo->rescHier, 0 ); } srcDataObjInp->numThreads = destDataObjInp->numThreads; status = dataObjCopy( rsComm, destL1descInx ); } memset( &dataObjCloseInp, 0, sizeof( dataObjCloseInp ) ); dataObjCloseInp.l1descInx = destL1descInx; if ( status >= 0 ) { *transStat = ( transferStat_t* )malloc( sizeof( transferStat_t ) ); memset( *transStat, 0, sizeof( transferStat_t ) ); ( *transStat )->bytesWritten = srcDataObjInfo->dataSize; ( *transStat )->numThreads = destDataObjInp->numThreads; dataObjCloseInp.bytesWritten = srcDataObjInfo->dataSize; } status2 = rsDataObjClose( rsComm, &dataObjCloseInp ); if ( status ) { return status; } return status2; }
//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ int main(int argc, char * argv[]) //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ { int i,l; subOptLev=-1; symRed=1; for (i=1;i<argc;i++) { if (argv[i][0]=='+')//all optimal solutions/suboptimal solutions { subOptLev=0; if (argv[i][1]>'0' && argv[i][1]<='9') subOptLev= argv[i][1]-48; } if (argv[i][0]=='-') { if (argv[i][1]=='s') symRed=0; } } printf("initializing memory.\n"); visitedA = (char *)calloc(NGOAL/8+1,1);//initialized to 0 by default visitedB = (char *)calloc(NGOAL/8+1,1); for (l=0;l<NTWIST;l++) movesCloserToTarget[l] = (short *)calloc(NFLIPSLICE*2,2); printf("initializing tables");fflush(stdout); initSymCubes(); initMoveCubes(); initInvSymIdx(); initSymIdxMultiply(); initMoveConjugate(); initMoveBitsConjugate(); initGESymmetries(); initTwistConjugate();pp(); initRawFLipSliceRep();pp(); initTwistMove();pp(); initCorn6PosMove();pp(); initEdge4PosMove();pp(); initEdge6PosMove();pp(); initSymFlipSliceClassMove();pp(); initMovesCloserToTarget();pp(); initNextMove();pp(); printf("\r\n"); #pragma omp parallel num_threads(NUM_GROUPS) private(cc_a,manString) { int ID = omp_get_thread_num(); printf("Hello from thread %d %d\n",ID,omp_get_thread_num()); int ext = 0; int cont = 0; #pragma omp barrier FILE *groupOut = NULL; char groupOutFileName[50]; sprintf(groupOutFileName,"OpenMPHybridDepth1Group%dout.txt",ID); groupOut = fopen(groupOutFileName,"w"); close(groupOut); omp_set_nested(1); while (1) { #pragma omp critical { printf("enter cube (x to exit): ");fflush(stdout); if (fgets(manString,sizeof(manString),stdin)==NULL) ext=1; if(ext==0){ if (manString[0]=='x') ext=1; gettimeofday(&start,NULL); l=strlen(manString); if (manString[l-1]=='\n') manString[l-1]=0;//remove LF if (l>1 && manString[l-2]=='\r') manString[l-2]=0;//remove CR if present if (strlen(manString)==0) cont=1; if(cont == 0){ printf("\nsolving optimal: %s\n",manString);fflush(stdout); } } } if(ext){break;} if(cont){continue;} cc_a = stringToCubieCube(manString); #ifdef _LINUX_ if (sigsetjmp(jump_buf, 1) == 0) { signal(SIGINT, user_break); solveOptimal(cc_a,ID); printf("Solved\n"); gettimeofday(&end,NULL); msecs = (end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 - start.tv_usec); printf("\nTime elapsed for %s: %f\nEND-OF-SOLVE\n",manString,1.0*msecs/1000000.0);fflush(stdout); } signal(SIGINT, SIG_IGN); #else solveOptimal(cc_a,ID); printf("Solved\n"); gettimeofday(&end,NULL); msecs = (end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 - start.tv_usec); printf("\nTime elapsed for %s: %f\nEND-OF-SOLVE\n",manString,1.0*msecs/1000000.0);fflush(stdout); #endif printf("Concatenating into one file\n"); int threadCounter = 0; FILE *groupOut = NULL; char groupOutFileName[50]; sprintf(groupOutFileName,"OpenMPHybridDepth1Group%dout.txt",ID); groupOut = fopen(groupOutFileName,"a"); FILE *tempIn = NULL; char inFileName[50]; char line[100]; printf("Entering loop\n"); for(threadCounter=0;threadCounter<NUM_THREADS;threadCounter++){ sprintf(inFileName,"OpenMPHybridDepth1Thread%dGroup%dOut.txt",threadCounter,ID); printf("%s\n",inFileName); tempIn = fopen(inFileName,"r"); while(fgets(line,sizeof line,tempIn) != NULL){ fprintf(groupOut,line); fflush(groupOut); } fclose (tempIn); fprintf(groupOut,"\n\n\n"); fflush(groupOut); } fprintf(groupOut,"\nTime elapsed for %s: %f\nEND-OF-SOLVE\n",manString,(1.0*msecs/1000000.0));fflush(groupOut); fclose(groupOut); } } printf("Writing to all out\n"); FILE *allOut = NULL; allOut = fopen("OpenMPHybridDepth1AllOut.txt","w"); FILE *tempIn = NULL; char inFileName[50]; char line[100]; int threadCounter; printf("Entering all out loop\n"); for(threadCounter=0;threadCounter<getNumThreads();threadCounter++){ sprintf(inFileName,"OpenMPHybridDepth1Group%dout.txt",threadCounter); tempIn = fopen(inFileName,"r"); while(fgets(line,sizeof line,tempIn) != NULL){ fprintf(allOut,line); fflush(allOut); } fclose (tempIn); fprintf(allOut,"\n\n\n"); fflush(allOut); } fclose(allOut); return 0; }
void testShutdownNonBlocking() { int myPID = OsProcess::getCurrentPID(); int startingThreads; // Stop TimerTask and NatAgentTask before counting threads. // Some tests do not bother stopping them, so they may come started. OsTimerTask::destroyTimerTask(); OsNatAgentTask::releaseInstance(); // Count number of threads now. startingThreads = getNumThreads(myPID); // Simple invite message from siptest/src/siptest/invite.txt const char* SimpleMessage = "INVITE sip:[email protected] SIP/2.0\r\n" "Route: <sip:[email protected]:5064;lr>\r\n" "From: <sip:[email protected];user=phone>;tag=bbb\r\n" "To: <sip:[email protected]:3000;user=phone>\r\n" "Call-Id: 8\r\n" "Cseq: 1 INVITE\r\n" "Content-Length: 0\r\n" "\r\n"; SipMessage testMsg( SimpleMessage, strlen( SimpleMessage ) ); for(int i = 0; i < SHUTDOWN_TEST_ITERATIONS; ++i) { // Limit life time of lineMgr and refreshMgr. They should be freed // before releasing OsNatAgentTask instance, or we will crash. { SipLineMgr lineMgr; SipRefreshMgr refreshMgr; lineMgr.StartLineMgr(); lineMgr.initializeRefreshMgr( &refreshMgr ); SipUserAgent sipUA( 5090 ,5090 ,5091 ,NULL // default publicAddress ,NULL // default defaultUser ,"127.0.0.1" // default defaultSipAddress ,NULL // default sipProxyServers ,NULL // default sipDirectoryServers ,NULL // default sipRegistryServers ,NULL // default authenticationScheme ,NULL // default authenicateRealm ,NULL // default authenticateDb ,NULL // default authorizeUserIds ,NULL // default authorizePasswords ,&lineMgr ); sipUA.start(); refreshMgr.init(&sipUA); sipUA.send(testMsg); // Wait long enough for some stack timeouts/retransmits to occur OsTask::delay(10000); // 10 seconds sipUA.shutdown(FALSE); lineMgr.requestShutdown(); refreshMgr.requestShutdown(); while(!sipUA.isShutdownDone()) { ; } CPPUNIT_ASSERT(sipUA.isShutdownDone()); } // Stop TimerTask and NatAgentTask again before counting threads. // They were started while testing. OsTimerTask::destroyTimerTask(); OsNatAgentTask::releaseInstance(); // Test to see that all the threads created by the above operations // get properly shut down. // Since the threads do not shut down synchronously with the above // calls, we have to wait before we know they will be cleared. OsTask::delay(1000); // 1 second int numThreads = getNumThreads(myPID); CPPUNIT_ASSERT_EQUAL(startingThreads,numThreads); } };
// CL: number of blocks for threads. inline int GET_BLOCKS(THClState *state, const int N) { return (N + getNumThreads(state) - 1) / getNumThreads(state); }
void testShutdownBlocking() { pid_t myPID = OsProcess::getCurrentPID(); int startingThreads = getNumThreads(myPID); // Simple invite message from siptest/src/siptest/invite.txt const char* SimpleMessage = "INVITE sip:[email protected] SIP/2.0\r\n" "Route: <sip:[email protected]:5064;lr>\r\n" "From: <sip:[email protected];user=phone>;tag=bbb\r\n" "To: <sip:[email protected]:3000;user=phone>\r\n" "Call-Id: 8\r\n" "Cseq: 1 INVITE\r\n" "Content-Length: 0\r\n" "\r\n"; SipMessage testMsg( SimpleMessage, strlen( SimpleMessage ) ); for(int i = 0; i < SHUTDOWN_TEST_ITERATIONS; ++i) { { SipLineMgr lineMgr; SipRefreshMgr refreshMgr; lineMgr.StartLineMgr(); lineMgr.initializeRefreshMgr( &refreshMgr ); SipUserAgent sipUA( 5090 ,5090 ,5091 ,NULL // default publicAddress ,NULL // default defaultUser ,"127.0.0.1" // default defaultSipAddress ,NULL // default sipProxyServers ,NULL // default sipDirectoryServers ,NULL // default sipRegistryServers ,NULL // default authenticationScheme ,NULL // default authenicateRealm ,NULL // default authenticateDb ,NULL // default authorizeUserIds ,NULL // default authorizePasswords ,&lineMgr ); sipUA.start(); refreshMgr.init(&sipUA); sipUA.send(testMsg); // Wait long enough for some stack timeouts/retansmits to occur OsTask::delay(10000); // 10 seconds // Shut down the tasks in reverse order. refreshMgr.requestShutdown(); sipUA.shutdown(TRUE); lineMgr.requestShutdown(); CPPUNIT_ASSERT(sipUA.isShutdownDone()); OsTimerTask::destroyTimerTask(); OsStunAgentTask::releaseInstance(); } // Test to see that all the threads created by the above operations // get properly shut down. int numThreads = getNumThreads(myPID); OsSysLog::add(FAC_SIP, PRI_NOTICE, "SipUserAgentTest::testShutdownBlocking " "numThreads=%d startingThreads=%d", numThreads, startingThreads); KNOWN_BUG("XECS-48", "Some threads are not cleaned up?"); CPPUNIT_ASSERT(numThreads <= startingThreads); } };
//Worker function. Start and end represent the #pragma line and the closing brace //Drives all the work for a parallel for void parallelForHelper(int start, int end) { //grab the necessary values from the #pragma line std::string pragmaString = input.at(start); strvec privVars = getConstructVars(pragmaString, "private"); strvec sharedVars = getConstructVars(pragmaString, "shared"); int numThreads = getNumThreads(pragmaString); //create a vector for the new pthreads void* function std::vector<std::string> newFunction; std::string newFuncName; std::string smallNewFuncName = std::string("func").append(std::to_string(currentFunction)); newFuncName.append("void* func").append(std::to_string(currentFunction)).append("(void* paramStruct)"); newFunction.push_back(newFuncName); currentFunction++; newFunction.push_back("{"); //now handle the case when a parallel for is inside a parallel. in that case, the parfor will not have priv/shared declared //it will instead use the priv/shared declared above //those are backed up in global vars. if (privVars.size() == 0 && sharedVars.size() == 0) //if we determine nothing is private, copy the global backup to the local strvecs { for (int i = 0; i < privVarBackup.size(); i++) { privVars.push_back(privVarBackup.at(i)); } for (int i = 0; i < sharedVarBackup.size(); i++) { sharedVars.push_back(sharedVarBackup.at(i)); } } //redeclare privates in the new function redeclareVars(privVars, newFunction); strvec privVarDeclarations; redeclareVars(privVars, privVarDeclarations); add(privVarDeclarations, totalPrivBackup); //globals will appear later redeclareVars(sharedVars, globalVars); //move the code to the new function //first modify the for loop to use the start and end from the new struct int numIterations = getNumIterations(input.at(start + 1)); input.at(start + 1) = fixForLine(input.at(start + 1)); //then copy the rest of the stuff for (int i = start + 1; i < end; i++) { newFunction.push_back(input.at(i)); } newFunction.push_back("}"); newFunction.push_back("}"); //delete the #pragma section input.erase(input.begin() + start, input.begin() + end + 1); //record the offset as start since we're going to be changing the size of the vector int newOffset = start; //set up the pthreads code //first we need an array of pthread_t threadids with size = numthreads std::string tempString = std::string("pthread_t threads[").append(std::to_string(numThreads)).append("];"); input.insert(input.begin() + newOffset++, tempString); //populate the threads[] array std::string loopVar = std::string("uniqueVar").append(std::to_string(uniqueVarNum)); uniqueVarNum++; tempString = std::string("for (int ").append(loopVar).append(" = 0; ").append(loopVar).append(" < ").append(std::to_string(numThreads)).append("; ").append(loopVar).append("++)"); input.insert(input.begin() + newOffset++, tempString); input.insert(input.begin() + newOffset++, "{"); tempString = std::string("threads[").append(loopVar).append("] = ").append(loopVar).append(";"); input.insert(input.begin() + newOffset++, tempString); input.insert(input.begin() + newOffset++, "}"); //create the pthreads code int uneven = numIterations - ((numIterations / numThreads) * numThreads); //figure out how many don't go in evenly int basicNum = numIterations / numThreads; //now distribute the iterations among the pthreads //example: 18 iterations among 4 threads //break it down as follows: //0-3 thread 1 //4-7 thread 2 //8-11 thread 3 //12-17 thread 4 //or 12-14 thread 4 int startIteration = 0; int endIteration = basicNum - 1; for (int i = 0; i < numThreads; i++) { tempString = std::string("StartEnd paramStruct").append(std::to_string(i)).append(";"); input.insert(input.begin() + newOffset++, tempString); tempString = std::string("paramStruct").append(std::to_string(i)).append(".start = ").append(std::to_string(startIteration)).append(";"); input.insert(input.begin() + newOffset++, tempString); startIteration += basicNum; if (i == numThreads - 1) //if we're on the last loop, give all remaining iterations to this thread. { endIteration = numIterations - 1; //-1 to fix off by one error } tempString = std::string("paramStruct").append(std::to_string(i)).append(".end = ").append(std::to_string(endIteration + 1)).append(";"); //plus 1 since for loop is < input.insert(input.begin() + newOffset++, tempString); endIteration += basicNum; tempString = std::string("paramStruct").append(std::to_string(i)).append(".threadNum = ").append(std::to_string(i)).append(";"); input.insert(input.begin() + newOffset++, tempString); tempString = std::string("pthread_create(&threads[").append(std::to_string(i)).append("], NULL, ").append(smallNewFuncName).append(", (void*) ¶mStruct").append(std::to_string(i)).append(");"); input.insert(input.begin() + newOffset++, tempString); } //now join the threads for (int i = 0; i < numThreads; i++) { tempString = std::string("pthread_join(threads[").append(std::to_string(i)).append("], NULL);"); input.insert(input.begin() + newOffset++, tempString); } //insert the new stuff, in reverse order!! insertAfterIncludes(newFunction); //first, new function }
//Worker function. Start and end represent the #pragma line and the closing brace //Drives all the work for a parallel void parallelHelper(int start, int end) { //grab the necessary values from the #pragma line std::string pragmaString = input.at(start); strvec privVars = getConstructVars(pragmaString, "private"); strvec sharedVars = getConstructVars(pragmaString, "shared"); int numThreads = getNumThreads(pragmaString); //save the values in a backup for redeclaration in an inner function privVarBackup.clear(); for (int i = 0; i < privVars.size(); i++) { privVarBackup.push_back(privVars.at(i)); } sharedVarBackup.clear(); for (int i = 0; i < sharedVars.size(); i++) { sharedVarBackup.push_back(sharedVars.at(i)); } //create a vector for the new pthreads void* function std::vector<std::string> newFunction; //add to the new function std::string newFuncName; std::string smallNewFuncName = std::string("func").append(std::to_string(currentFunction)); newFuncName.append("void* func").append(std::to_string(currentFunction)).append("(void* paramStruct)"); newFunction.push_back(newFuncName); currentFunction++; newFunction.push_back("{"); //redeclare variables in the new function redeclareVars(privVars, newFunction); strvec privVarDeclarations; redeclareVars(privVars, privVarDeclarations); add(privVarDeclarations, totalPrivBackup); //stick shared vars in the global vector for declaration later redeclareVars(sharedVars, globalVars); //copy the function code as-is for (int i = start + 2; i < end; i++) //move start up to pass first bracket, < end to not include last bracket. { newFunction.push_back(input.at(i)); } newFunction.push_back("}"); //delete the #pragma section input.erase(input.begin() + start, input.begin() + end + 1); //record the offset as start since we're going to be changing the size of the vector int newOffset = start; //set up the pthreads code //first we need an array of pthread_t threadids with size = numthreads std::string tempString = std::string("pthread_t threads[").append(std::to_string(numThreads)).append("];"); //TODO do we need to populate this? input.insert(input.begin() + newOffset++, tempString); //populate the threads[] array std::string loopVar = std::string("uniqueVar").append(std::to_string(uniqueVarNum)); uniqueVarNum++; tempString = std::string("for (int ").append(loopVar).append(" = 0; ").append(loopVar).append(" < ").append(std::to_string(numThreads)).append("; ").append(loopVar).append("++)"); input.insert(input.begin() + newOffset++, tempString); input.insert(input.begin() + newOffset++, "{"); tempString = std::string("threads[").append(loopVar).append("] = ").append(loopVar).append(";"); input.insert(input.begin() + newOffset++, tempString); input.insert(input.begin() + newOffset++, "}"); //now create the pthreads for (int i = 0; i < numThreads; i++) { tempString = std::string("StartEnd paramStruct").append(std::to_string(i)).append(";"); input.insert(input.begin() + newOffset++, tempString); tempString = std::string("paramStruct").append(std::to_string(i)).append(".threadNum = ").append(std::to_string(i)).append(";"); input.insert(input.begin() + newOffset++, tempString); tempString = std::string("pthread_create(&threads[").append(std::to_string(i)).append("], NULL, ").append(smallNewFuncName).append(", (void*) ¶mStruct").append(std::to_string(i)).append(");"); input.insert(input.begin() + newOffset++, tempString); } //now set up a for loop to join the pthreads loopVar = std::string("uniqueVar").append(std::to_string(uniqueVarNum)); uniqueVarNum++; tempString = std::string("for (int ").append(loopVar).append(" = 0; ").append(loopVar).append(" < ").append(std::to_string(numThreads)).append("; ").append(loopVar).append("++)"); input.insert(input.begin() + newOffset++, tempString); input.insert(input.begin() + newOffset++, "{"); tempString = std::string("pthread_join(threads[").append(loopVar).append("], NULL);"); input.insert(input.begin() + newOffset++, tempString); input.insert(input.begin() + newOffset++, "}"); //insert the new stuff, in reverse order!! insertAfterIncludes(newFunction); //first, new function }