EstOutput Weighted::getValues(Tree* t, int p, string o) { try { data.clear(); //clear out old values int numGroups; vector<double> D; processors = p; outputDir = o; CountTable* ct = t->getCountTable(); numGroups = m->getNumGroups(); if (m->control_pressed) { return data; } //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; vector< vector<string> > namesOfGroupCombos; for (int i=0; i<numGroups; i++) { for (int l = 0; l < i; l++) { //initialize weighted scores //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0; vector<string> groups; groups.push_back((m->getGroups())[i]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); lines.clear(); return data; } catch(exception& e) { m->errorOut(e, "Weighted", "getValues"); exit(1); } }
int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vector<int>& averageQ, vector< vector<int> >& scores, string filename) { try { int process = 1; int numSeqs = 0; processIDS.clear(); bool recalc = false; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[process]); //pass numSeqs to parent ofstream out; string tempFile = qualfile + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << numSeqs << endl; out << position.size() << endl; for (int k = 0; k < position.size(); k++) { out << position[k] << '\t'; } out << endl; for (int k = 0; k < averageQ.size(); k++) { out << averageQ[k] << '\t'; } out << endl; for (int k = 0; k < scores.size(); k++) { for (int j = 0; j < 41; j++) { out << scores[k][j] << '\t'; } out << endl; } out << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process; for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } //wait to die for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) { m->mothurRemove(qualfile + (toString(processIDS[i]) + ".num.temp")); } recalc = true; break; } } if (recalc) { //test line, also set recalc to true. //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) {m->mothurRemove(qualfile + (toString(processIDS[i]) + ".num.temp"));}processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n"); //redo file divide lines.clear(); vector<unsigned long long> positions = m->divideFile(qualfile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } numSeqs = 0; processIDS.resize(0); process = 1; position.clear(); averageQ.clear(); scores.clear(); //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[process]); //pass numSeqs to parent ofstream out; string tempFile = qualfile + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << numSeqs << endl; out << position.size() << endl; for (int k = 0; k < position.size(); k++) { out << position[k] << '\t'; } out << endl; for (int k = 0; k < averageQ.size(); k++) { out << averageQ[k] << '\t'; } out << endl; for (int k = 0; k < scores.size(); k++) { for (int j = 0; j < 41; j++) { out << scores[k][j] << '\t'; } out << endl; } out << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } } //do your part numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]); //force parent to wait until all the processes are done for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } //parent reads in and combine Filter info for (int i = 0; i < processIDS.size(); i++) { string tempFilename = qualfile + toString(processIDS[i]) + ".num.temp"; ifstream in; m->openInputFile(tempFilename, in); int temp, tempNum; in >> tempNum; m->gobble(in); numSeqs += tempNum; in >> tempNum; m->gobble(in); if (position.size() < tempNum) { position.resize(tempNum, 0); } if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); } if (scores.size() < tempNum) { scores.resize(tempNum); for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); } } for (int k = 0; k < tempNum; k++) { in >> temp; position[k] += temp; } m->gobble(in); for (int k = 0; k < tempNum; k++) { in >> temp; averageQ[k] += temp; } m->gobble(in); for (int k = 0; k < tempNum; k++) { for (int j = 0; j < 41; j++) { in >> temp; scores[k][j] += temp; m->gobble(in); } } in.close(); m->mothurRemove(tempFilename); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the seqSumQualData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to pass results vectors. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<seqSumQualData*> pDataArray; DWORD dwThreadIdArray[processors]; HANDLE hThreadArray[processors]; bool hasNameMap = false; if ((namefile !="") || (countfile != "")) { hasNameMap = true; } //Create processor worker threads. for( int i=0; i<processors; i++ ){ // Allocate memory for thread data. seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, hasNameMap, nameMap); pDataArray.push_back(tempSum); processIDS.push_back(i); hThreadArray[i] = CreateThread(NULL, 0, MySeqSumQualThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); } //Wait until all threads have terminated. WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ numSeqs += pDataArray[i]->numSeqs; if (pDataArray[i]->count != pDataArray[i]->end) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; } int tempNum = pDataArray[i]->position.size(); if (position.size() < tempNum) { position.resize(tempNum, 0); } if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); } if (scores.size() < tempNum) { scores.resize(tempNum); for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); } } for (int k = 0; k < tempNum; k++) { position[k] += pDataArray[i]->position[k]; } for (int k = 0; k < tempNum; k++) { averageQ[k] += pDataArray[i]->averageQ[k]; } for (int k = 0; k < tempNum; k++) { for (int j = 0; j < 41; j++) { scores[k][j] += pDataArray[i]->scores[k][j]; } } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif return numSeqs; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "createProcessesCreateSummary"); exit(1); } }
//*************************************************************************************************************** int SummaryQualCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } int start = time(NULL); int numSeqs = 0; vector<int> position; vector<int> averageQ; vector< vector<int> > scores; if (m->control_pressed) { return 0; } if (namefile != "") { nameMap = m->readNames(namefile); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); nameMap = ct.getNameMap(); } vector<unsigned long long> positions; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(qualfile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(linePair(0, 1000)); }else { positions = m->setFilePosFasta(qualfile, numSeqs); if (numSeqs < processors) { processors = numSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif if(processors == 1){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]); } else{ numSeqs = createProcessesCreateSummary(position, averageQ, scores, qualfile); } if (m->control_pressed) { return 0; } //print summary file map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qualfile)); string summaryFile = getOutputFileName("summary",variables); printQual(summaryFile, position, averageQ, scores); if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; } //output results to screen cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); m->mothurOutEndLine(); m->mothurOut("Position\tNumSeqs\tAverageQ"); m->mothurOutEndLine(); for (int i = 0; i < position.size(); i+=100) { float average = averageQ[i] / (float) position[i]; cout << i << '\t' << position[i] << '\t' << average; m->mothurOutJustToLog(toString(i) + "\t" + toString(position[i]) + "\t" + toString(average)); m->mothurOutEndLine(); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(summaryFile); m->mothurOutEndLine(); outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int SffMultipleCommand::createProcesses(vector<string> sffFiles, vector<string> oligosFiles, string fasta, string name, string group){ try { vector<int> processIDS; int process = 1; int num = 0; //divide the groups between the processors vector<linePair> lines; vector<int> numFilesToComplete; int numFilesPerProcessor = sffFiles.size() / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numFilesPerProcessor; int endIndex = (i+1) * numFilesPerProcessor; if(i == (processors - 1)){ endIndex = sffFiles.size(); } lines.push_back(linePair(startIndex, endIndex)); numFilesToComplete.push_back((endIndex-startIndex)); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + m->mothurGetpid(process) + ".temp", name + m->mothurGetpid(process) + ".temp", group + m->mothurGetpid(process) + ".temp"); //pass numSeqs to parent ofstream out; string tempFile = m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << '\t' << outputNames.size() << endl; for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; } out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } //do my part num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end, fasta, name, group); //force parent to wait until all the processes are done for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } for (int i=0;i<processIDS.size();i++) { ifstream in; string tempFile = toString(processIDS[i]) + ".num.temp"; m->openInputFile(tempFile, in); if (!in.eof()) { int tempNum = 0; int outputNamesSize = 0; in >> tempNum >> outputNamesSize; m->gobble(in); for (int j = 0; j < outputNamesSize; j++) { string tempName; in >> tempName; m->gobble(in); outputNames.push_back(tempName); } if (tempNum != numFilesToComplete[i+1]) { m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n"); } } in.close(); m->mothurRemove(tempFile); if (append) { m->appendFiles(fasta+toString(processIDS[i])+".temp", fasta); m->mothurRemove(fasta+toString(processIDS[i])+".temp"); m->appendFiles(name+toString(processIDS[i])+".temp", name); m->mothurRemove(name+toString(processIDS[i])+".temp"); if (makeGroup) { m->appendFiles(group+toString(processIDS[i])+".temp", group); m->mothurRemove(group+toString(processIDS[i])+".temp"); } } } #endif return 0; }
int MetaStatsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //just used to convert files to test metastats online /****************************************************/ bool convertInputToShared = false; convertSharedToInput = false; if (convertInputToShared) { convertToShared(sharedfile); return 0; } /****************************************************/ designMap = new GroupMap(designfile); designMap->readDesignMap(); input = new InputData(sharedfile, "sharedfile"); lookup = input->getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; //setup the pairwise comparions of sets for metastats //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; //make sure sets are all in designMap SharedUtil* util = new SharedUtil(); vector<string> dGroups = designMap->getNamesOfGroups(); util->setGroups(Sets, dGroups); delete util; int numGroups = Sets.size(); for (int a=0; a<numGroups; a++) { for (int l = 0; l < a; l++) { vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]); namesOfGroupCombos.push_back(groups); } } //only 1 combo if (numGroups == 2) { processors = 1; } else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; } if(processors != 1){ int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } } //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); } if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup[0]->getLabel(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); //restore real lastlabel to save below lookup[0]->setLabel(saveLabel); } lastLabel = lookup[0]->getLabel(); //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } //reset groups parameter m->clearGroups(); delete input; delete designMap; if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "execute"); exit(1); } }
int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) { try { vector<int> processIDS; int process = 1; int num = 0; //sanity check if (groups.size() < processors) { processors = groups.size(); } //divide the groups between the processors vector<linePair> lines; int numGroupsPerProcessor = groups.size() / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numGroupsPerProcessor; int endIndex = (i+1) * numGroupsPerProcessor; if(i == (processors - 1)){ endIndex = groups.size(); } lines.push_back(linePair(startIndex, endIndex)); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //loop through and create all the processes you want while (process != processors) { int pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); //pass numSeqs to parent ofstream out; string tempFile = outputFName + toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } //do my part num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } for (int i = 0; i < processIDS.size(); i++) { ifstream in; string tempFile = outputFName + toString(processIDS[i]) + ".num.temp"; m->openInputFile(tempFile, in); if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } in.close(); m->mothurRemove(tempFile); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the preClusterData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<perseusData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=1; i<processors; i++ ){ // Allocate memory for thread data. string extension = toString(i) + ".temp"; perseusData* tempPerseus = new perseusData(alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i); pDataArray.push_back(tempPerseus); processIDS.push_back(i); //MyPerseusThreadFunction is in header. It must be global or static to work with the threads. //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } //using the main process as a worker saves time and memory num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ num += pDataArray[i]->count; CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif //append output files for(int i=0;i<processIDS.size();i++){ m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName); m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp")); m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos); m->mothurRemove((accnos + toString(processIDS[i]) + ".temp")); } return num; }
int PreClusterCommand::createProcessesGroups(string newFName, string newNName, string newMFile, vector<string> groups) { try { vector<int> processIDS; int process = 1; int num = 0; bool recalc = false; //sanity check if (groups.size() < processors) { processors = groups.size(); } //divide the groups between the processors vector<linePair> lines; int remainingPairs = groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ outputNames.clear(); num = driverGroups(newFName + m->mothurGetpid(process) + ".temp", newNName + m->mothurGetpid(process) + ".temp", newMFile, lines[process].start, lines[process].end, groups); string tempFile = m->mothurGetpid(process) + ".outputNames.temp"; ofstream outTemp; m->openOutputFile(tempFile, outTemp); outTemp << outputNames.size(); for (int i = 0; i < outputNames.size(); i++) { outTemp << outputNames[i] << endl; } outTemp.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process; for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } //wait to die for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) { m->mothurRemove((toString(processIDS[i]) + ".outputNames.temp")); } recalc = true; break; } } if (recalc) { //test line, also set recalc to true. //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) {m->mothurRemove((toString(processIDS[i]) + ".outputNames.temp"));}processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n"); lines.clear(); num = 0; processIDS.resize(0); process = 1; int remainingPairs = groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ outputNames.clear(); num = driverGroups(newFName + m->mothurGetpid(process) + ".temp", newNName + m->mothurGetpid(process) + ".temp", newMFile, lines[process].start, lines[process].end, groups); string tempFile = m->mothurGetpid(process) + ".outputNames.temp"; ofstream outTemp; m->openOutputFile(tempFile, outTemp); outTemp << outputNames.size(); for (int i = 0; i < outputNames.size(); i++) { outTemp << outputNames[i] << endl; } outTemp.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } } //do my part num = driverGroups(newFName, newNName, newMFile, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } for (int i = 0; i < processIDS.size(); i++) { string tempFile = toString(processIDS[i]) + ".outputNames.temp"; ifstream intemp; m->openInputFile(tempFile, intemp); int num; intemp >> num; for (int k = 0; k < num; k++) { string name = ""; intemp >> name; m->gobble(intemp); outputNames.push_back(name); outputTypes["map"].push_back(name); } intemp.close(); m->mothurRemove(tempFile); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the preClusterData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<preClusterData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=1; i<processors; i++ ){ // Allocate memory for thread data. string extension = toString(i) + ".temp"; preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, countfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, topdown, i, method, align, match, misMatch, gapOpen, gapExtend); pDataArray.push_back(tempPreCluster); processIDS.push_back(i); //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier hThreadArray[i-1] = CreateThread(NULL, 0, MyPreclusterThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } //using the main process as a worker saves time and memory num = driverGroups(newFName, newNName, newMFile, lines[0].start, lines[0].end, groups); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; } for (int j = 0; j < pDataArray[i]->mapFileNames.size(); j++) { outputNames.push_back(pDataArray[i]->mapFileNames[j]); outputTypes["map"].push_back(pDataArray[i]->mapFileNames[j]); } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif //append output files for(int i=0;i<processIDS.size();i++){ //newFName = m->getFullPathName(".\\" + newFName); //newNName = m->getFullPathName(".\\" + newNName); m->appendFiles((newFName + toString(processIDS[i]) + ".temp"), newFName); m->mothurRemove((newFName + toString(processIDS[i]) + ".temp")); m->appendFiles((newNName + toString(processIDS[i]) + ".temp"), newNName); m->mothurRemove((newNName + toString(processIDS[i]) + ".temp")); } return num; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "createProcessesGroups"); exit(1); } }
//*************************************************************************************************************** long long DegapSeqsCommand::createProcesses(string filename, string outputFileName){ try{ //create array of worker threads vector<thread*> workerThreads; vector<degapData*> data; vector<linePair> lines; long long num = 0; vector<unsigned long long> positions; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(linePair(0, -1)); }//forces it to read whole file else { positions = util.setFilePosFasta(filename, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif auto synchronizedFile = std::make_shared<SynchronizedOutputFile>(outputFileName); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadWriter = new OutputWriter(synchronizedFile); degapData* dataBundle = new degapData(filename, lines[i+1].start, lines[i+1].end, threadWriter); data.push_back(dataBundle); workerThreads.push_back(new thread(driverDegap, dataBundle)); } OutputWriter* threadWriter = new OutputWriter(synchronizedFile); degapData* dataBundle = new degapData(filename, lines[0].start, lines[0].end, threadWriter); driverDegap(dataBundle); num = dataBundle->count; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->threadWriter; delete data[i]; delete workerThreads[i]; } synchronizedFile->close(); delete threadWriter; delete dataBundle; return num; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "createProcesses"); exit(1); } }
//*************************************************************************************************************** int Bellerophon::getChimeras() { try { //create breaking points vector<int> midpoints; midpoints.resize(iters, window); for (int i = 1; i < iters; i++) { midpoints[i] = midpoints[i-1] + increment; } #ifdef USE_MPI int pid, numSeqsPerProcessor; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); numSeqsPerProcessor = iters / processors; //each process hits this only once unsigned long long startPos = pid * numSeqsPerProcessor; if(pid == processors - 1){ numSeqsPerProcessor = iters - pid * numSeqsPerProcessor; } lines.push_back(linePair(startPos, numSeqsPerProcessor)); //fill pref with scores driverChimeras(midpoints, lines[0]); if (m->control_pressed) { return 0; } //each process must send its parts back to pid 0 if (pid == 0) { //receive results for (int j = 1; j < processors; j++) { vector<string> MPIBestSend; for (int i = 0; i < numSeqs; i++) { if (m->control_pressed) { return 0; } MPI_Status status; //receive string int length; MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status); char* buf = new char[length]; MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status); string temp = buf; if (temp.length() > length) { temp = temp.substr(0, length); } delete buf; MPIBestSend.push_back(temp); } fillPref(j, MPIBestSend); if (m->control_pressed) { return 0; } } }else { //takes best window for each sequence and turns Preference to string that can be parsed by pid 0. //played with this a bit, but it may be better to try user-defined datatypes with set string lengths?? vector<string> MPIBestSend = getBestWindow(lines[0]); pref.clear(); //send your result to parent for (int i = 0; i < numSeqs; i++) { if (m->control_pressed) { return 0; } int bestLength = MPIBestSend[i].length(); char* buf = new char[bestLength]; memcpy(buf, MPIBestSend[i].c_str(), bestLength); MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD); delete buf; } MPIBestSend.clear(); } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //divide breakpoints between processors #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ lines.push_back(linePair(0, iters)); //fill pref with scores driverChimeras(midpoints, lines[0]); }else{ int numSeqsPerProcessor = iters / processors; for (int i = 0; i < processors; i++) { unsigned long long startPos = i * numSeqsPerProcessor; if(i == processors - 1){ numSeqsPerProcessor = iters - i * numSeqsPerProcessor; } lines.push_back(linePair(startPos, numSeqsPerProcessor)); } createProcesses(midpoints); } #else lines.push_back(linePair(0, iters)); ///fill pref with scores driverChimeras(midpoints, lines[0]); #endif #endif return 0; } catch(exception& e) { m->errorOut(e, "Bellerophon", "getChimeras"); exit(1); } }
EstOutput Parsimony::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; CountTable* ct = t->getCountTable(); //if the users enters no groups then give them the score of all groups vector<string> mGroups = m->getGroups(); int numGroups = mGroups.size(); //calculate number of comparsions int numComp = 0; vector< vector<string> > namesOfGroupCombos; for (int r=0; r<numGroups; r++) { for (int l = 0; l < r; l++) { numComp++; vector<string> groups; groups.push_back(mGroups[r]); groups.push_back(mGroups[l]); //cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl; namesOfGroupCombos.push_back(groups); } } //numComp+1 for AB, AC, BC, ABC if (numComp != 1) { vector<string> groups; if (numGroups == 0) { //get score for all users groups vector<string> tGroups = ct->getNamesOfGroups(); for (int i = 0; i < tGroups.size(); i++) { if (tGroups[i] != "xxx") { groups.push_back(tGroups[i]); //cout << tmap->namesOfGroups[i] << endl; } } namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < mGroups.size(); i++) { groups.push_back(mGroups[i]); //cout << globaldata->Groups[i] << endl; } namesOfGroupCombos.push_back(groups); } } lines.clear(); int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); return data; } catch(exception& e) { m->errorOut(e, "Parsimony", "getValues"); exit(1); } }
int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string accnos, string filename) { try { //create array of worker threads vector<thread*> workerThreads; vector<classifyData*> data; long long num = 0; time_t start, end; time(&start); vector<unsigned long long> positions; vector<linePair> lines; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosFasta(filename, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif auto synchronizedAccnosFile = std::make_shared<SynchronizedOutputFile>(accnos); auto synchronizedTaxFile = std::make_shared<SynchronizedOutputFile>(taxFileName); auto synchronizedTaxTFile = std::make_shared<SynchronizedOutputFile>(tempTaxFile); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadTaxWriter = new OutputWriter(synchronizedTaxFile); OutputWriter* threadTaxTWriter = new OutputWriter(synchronizedTaxTFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedAccnosFile); classifyData* dataBundle = new classifyData(threadAccnosWriter, probs, threadTaxWriter, threadTaxTWriter, filename, lines[i+1].start, lines[i+1].end, flip, classify); data.push_back(dataBundle); workerThreads.push_back(new thread(driverClassifier, dataBundle)); } OutputWriter* threadTaxWriter = new OutputWriter(synchronizedTaxFile); OutputWriter* threadTaxTWriter = new OutputWriter(synchronizedTaxTFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedAccnosFile); classifyData* dataBundle = new classifyData(threadAccnosWriter, probs, threadTaxWriter, threadTaxTWriter, filename, lines[0].start, lines[0].end, flip, classify); driverClassifier(dataBundle); num = dataBundle->count; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->taxTWriter; delete data[i]->taxWriter; delete data[i]->accnosWriter; delete data[i]; delete workerThreads[i]; } synchronizedTaxTFile->close(); synchronizedTaxFile->close(); synchronizedAccnosFile->close(); delete threadTaxWriter; delete threadTaxTWriter; delete threadAccnosWriter; delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to classify " + toString(num) + " sequences.\n\n"); return num; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "createProcesses"); exit(1); } }
//void alignDriver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename, vector<long long>& numFlipped,MothurOut* m, string align, float match, float misMatch, float gapOpen, float gapExtend, float threshold, bool flip, AlignmentDB* templateDB, string search, long long& count) { long long AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename, vector<long long>& numFlipped) { try { vector<linePair> lines; vector<unsigned long long> positions; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numFastaSeqs = 0; positions = util.setFilePosFasta(filename, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; m->mothurOut("Reducing processors to " + toString(numFastaSeqs) + ".\n"); } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector<thread*> workerThreads; vector<alignStruct*> data; long long num = 0; for (int i = 0; i < numFlipped.size(); i++) { numFlipped[i] = 0; } time_t start, end; time(&start); NastReport nast; string nastHeaders = nast.getHeaders(); ofstream out; util.openOutputFile(reportFileName, out); out << nastHeaders; out.close(); auto synchronizedOutputAlignFile = std::make_shared<SynchronizedOutputFile>(alignFileName); auto synchronizedOutputReportFile = std::make_shared<SynchronizedOutputFile>(reportFileName, true); auto synchronizedOutputAccnosFile = std::make_shared<SynchronizedOutputFile>(accnosFName); for (int i = 0; i < processors-1; i++) { OutputWriter* threadAlignWriter = new OutputWriter(synchronizedOutputAlignFile); OutputWriter* threadReportWriter = new OutputWriter(synchronizedOutputReportFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedOutputAccnosFile); alignStruct* dataBundle = new alignStruct(lines[i+1], threadAlignWriter, threadReportWriter, threadAccnosWriter, filename, align, match, misMatch, gapOpen, gapExtend, threshold, flip, templateDB, search); data.push_back(dataBundle); workerThreads.push_back(new thread(alignDriver, dataBundle)); } OutputWriter* threadAlignWriter = new OutputWriter(synchronizedOutputAlignFile); OutputWriter* threadReportWriter = new OutputWriter(synchronizedOutputReportFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedOutputAccnosFile); alignStruct* dataBundle = new alignStruct(lines[0], threadAlignWriter, threadReportWriter, threadAccnosWriter, filename, align, match, misMatch, gapOpen, gapExtend, threshold, flip, templateDB, search); alignDriver(dataBundle); numFlipped[0] = dataBundle->flippedResults[0]; numFlipped[1] = dataBundle->flippedResults[1]; num = dataBundle->numSeqs; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->numSeqs; numFlipped[0] += data[i]->flippedResults[0]; numFlipped[1] += data[i]->flippedResults[1]; delete data[i]->alignWriter; delete data[i]->reportWriter; delete data[i]->accnosWriter; delete data[i]; delete workerThreads[i]; } synchronizedOutputAlignFile->close(); synchronizedOutputReportFile->close(); synchronizedOutputAccnosFile->close(); delete threadAlignWriter; delete threadAccnosWriter; delete threadReportWriter; delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to align " + toString(num) + " sequences.\n\n"); return num; } catch(exception& e) { m->errorOut(e, "AlignCommand", "createProcesses"); exit(1); } }
EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) { try { int process = 1; vector<int> processIDS; bool recalc = false; EstOutput results; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ EstOutput myresults; myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct); if (m->control_pressed) { exit(0); } //pass numSeqs to parent ofstream out; string tempFile = outputDir + m->mothurGetpid(process) + ".unweighted.results.temp"; m->openOutputFile(tempFile, out); out << myresults.size() << endl; for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process; for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } //wait to die for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) { m->mothurRemove(outputDir + (toString(processIDS[i]) + ".unweighted.results.temp")); } recalc = true; break; } } if (recalc) { //test line, also set recalc to true. //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) {m->mothurRemove(outputDir + (toString(processIDS[i]) + ".unweighted.results.temp"));}processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n"); //if the users enters no groups then give them the score of all groups int numGroups = m->getNumGroups(); //calculate number of comparsions int numComp = 0; vector< vector<string> > namesOfGroupCombos; for (int r=0; r<numGroups; r++) { for (int l = 0; l < r; l++) { numComp++; vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } if (numComp != 1) { vector<string> groups; if (numGroups == 0) { //get score for all users groups for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { if ((ct->getNamesOfGroups())[i] != "xxx") { groups.push_back((ct->getNamesOfGroups())[i]); } } namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < m->getNumGroups(); i++) { groups.push_back((m->getGroups())[i]); } namesOfGroupCombos.push_back(groups); } } lines.clear(); int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } results.clear(); processIDS.resize(0); process = 1; //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ EstOutput myresults; myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct); if (m->control_pressed) { exit(0); } //pass numSeqs to parent ofstream out; string tempFile = outputDir + m->mothurGetpid(process) + ".unweighted.results.temp"; m->openOutputFile(tempFile, out); out << myresults.size() << endl; for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } } results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { int temp = processIDS[i]; wait(&temp); } if (m->control_pressed) { return results; } //get data created by processes for (int i=0;i<(processors-1);i++) { ifstream in; string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp"; m->openInputFile(s, in); //get quantiles if (!in.eof()) { int num; in >> num; m->gobble(in); if (m->control_pressed) { break; } double w; for (int j = 0; j < num; j++) { in >> w; results.push_back(w); } m->gobble(in); } in.close(); m->mothurRemove(s); } #else //fill in functions vector<unweightedData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; vector<CountTable*> cts; vector<Tree*> trees; //Create processor worker threads. for( int i=1; i<processors; i++ ){ CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount); copyTree->getCopy(t); cts.push_back(copyCount); trees.push_back(copyTree); unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot); pDataArray.push_back(tempweighted); processIDS.push_back(i); hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); } delete cts[i]; delete trees[i]; CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif return results; }
EstOutput Unweighted::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; CountTable* ct = t->getCountTable(); //if the users enters no groups then give them the score of all groups int numGroups = m->getNumGroups(); //calculate number of comparsions int numComp = 0; vector< vector<string> > namesOfGroupCombos; for (int r=0; r<numGroups; r++) { for (int l = 0; l < r; l++) { numComp++; vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } if (numComp != 1) { vector<string> groups; if (numGroups == 0) { //get score for all users groups for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { if ((ct->getNamesOfGroups())[i] != "xxx") { groups.push_back((ct->getNamesOfGroups())[i]); } } namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < m->getNumGroups(); i++) { groups.push_back((m->getGroups())[i]); } namesOfGroupCombos.push_back(groups); } } lines.clear(); int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); lines.clear(); return data; } catch(exception& e) { m->errorOut(e, "Unweighted", "getValues"); exit(1); } }
bool ChopSeqsCommand::createProcesses(vector<linePair> lines, string filename, string outFasta, string outAccnos, string fastafileTemp) { try { int process = 1; bool wroteAccnos = false; vector<int> processIDS; vector<string> nonBlankAccnosFiles; bool recalc = false; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ string fastafileTempThisProcess = fastafileTemp; if (fastafileTempThisProcess != "") { fastafileTempThisProcess = fastafileTempThisProcess + m->mothurGetpid(process) + ".temp"; } wroteAccnos = driver(lines[process], filename, outFasta + m->mothurGetpid(process) + ".temp", outAccnos + m->mothurGetpid(process) + ".temp", fastafileTempThisProcess); //pass numSeqs to parent ofstream out; string tempFile = fastafile + m->mothurGetpid(process) + ".bool.temp"; m->openOutputFile(tempFile, out); out << wroteAccnos << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process; for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } //wait to die for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; recalc = true; break; } } if (recalc) { //test line, also set recalc to true. //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n"); lines.clear(); vector<unsigned long long> positions = m->divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } processIDS.resize(0); process = 1; while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ string fastafileTempThisProcess = fastafileTemp; if (fastafileTempThisProcess != "") { fastafileTempThisProcess = fastafileTempThisProcess + m->mothurGetpid(process) + ".temp"; } wroteAccnos = driver(lines[process], filename, outFasta + m->mothurGetpid(process) + ".temp", outAccnos + m->mothurGetpid(process) + ".temp", fastafileTempThisProcess); //pass numSeqs to parent ofstream out; string tempFile = fastafile + m->mothurGetpid(process) + ".bool.temp"; m->openOutputFile(tempFile, out); out << wroteAccnos << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } } //do your part wroteAccnos = driver(lines[0], filename, outFasta, outAccnos, fastafileTemp); //force parent to wait until all the processes are done for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } if (wroteAccnos) { nonBlankAccnosFiles.push_back(outAccnos); } else { m->mothurRemove(outAccnos); } //remove so other files can be renamed to it //parent reads in and combine Filter info for (int i = 0; i < processIDS.size(); i++) { string tempFilename = fastafile + toString(processIDS[i]) + ".bool.temp"; ifstream in; m->openInputFile(tempFilename, in); bool temp; in >> temp; m->gobble(in); if (temp) { wroteAccnos = temp; nonBlankAccnosFiles.push_back(outAccnos + toString(processIDS[i]) + ".temp"); } else { m->mothurRemove((outAccnos + toString(processIDS[i]) + ".temp")); } in.close(); m->mothurRemove(tempFilename); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the seqSumData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to allow both threads to add info to vectors. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<chopData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=0; i<processors-1; i++ ){ string extension = ""; if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); } // Allocate memory for thread data. string fastafileTempThisProcess = fastafileTemp; if (fastafileTempThisProcess != "") { fastafileTempThisProcess = fastafileTempThisProcess + extension; } chopData* tempChop = new chopData(filename, (outFasta+extension), (outAccnos+extension), m, lines[i].start, lines[i].end, keep, countGaps, numbases, Short, keepN, qualfile, fastafileTempThisProcess); pDataArray.push_back(tempChop); //MyChopThreadFunction is in header. It must be global or static to work with the threads. //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier hThreadArray[i] = CreateThread(NULL, 0, MyChopThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); } //do your part string fastafileTempThisProcess = fastafileTemp; if (fastafileTempThisProcess != "") { fastafileTempThisProcess = fastafileTempThisProcess + toString(processors-1) + ".temp"; } wroteAccnos = driver(lines[processors-1], filename, (outFasta + toString(processors-1) + ".temp"), (outAccnos + toString(processors-1) + ".temp"), fastafileTempThisProcess); processIDS.push_back(processors-1); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); if (wroteAccnos) { nonBlankAccnosFiles.push_back(outAccnos); } else { m->mothurRemove(outAccnos); } //remove so other files can be renamed to it //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ if (pDataArray[i]->wroteAccnos) { wroteAccnos = pDataArray[i]->wroteAccnos; nonBlankAccnosFiles.push_back(outAccnos + toString(processIDS[i]) + ".temp"); } else { m->mothurRemove((outAccnos + toString(processIDS[i]) + ".temp")); } //check to make sure the process finished if (pDataArray[i]->count != pDataArray[i]->end) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif for (int i = 0; i < processIDS.size(); i++) { if (fastafileTemp != "") { m->appendFiles((fastafileTemp + toString(processIDS[i]) + ".temp"), fastafileTemp); m->mothurRemove((fastafileTemp + toString(processIDS[i]) + ".temp")); } m->appendFiles((outFasta + toString(processIDS[i]) + ".temp"), outFasta); m->mothurRemove((outFasta + toString(processIDS[i]) + ".temp")); } if (nonBlankAccnosFiles.size() != 0) { m->renameFile(nonBlankAccnosFiles[0], outAccnos); for (int h=1; h < nonBlankAccnosFiles.size(); h++) { m->appendFiles(nonBlankAccnosFiles[h], outAccnos); m->mothurRemove(nonBlankAccnosFiles[h]); } }else { //recreate the accnosfile if needed ofstream out; m->openOutputFile(outAccnos, out); out.close(); } return wroteAccnos; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "createProcesses"); exit(1); } }
int ChopSeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } map<string, string> variables; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)); string outputFileName = getOutputFileName("fasta", variables); outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); string outputFileNameAccnos = getOutputFileName("accnos", variables); string fastafileTemp = ""; if (qualfile != "") { fastafileTemp = outputFileName + ".qualFile.Positions.temp"; } vector<unsigned long long> positions; vector<linePair> lines; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else int numSeqs = 0; positions = m->setFilePosFasta(fastafile, numSeqs); if (numSeqs < processors) { processors = numSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif bool wroteAccnos = false; if(processors == 1) { wroteAccnos = driver(lines[0], fastafile, outputFileName, outputFileNameAccnos, fastafileTemp); } else { wroteAccnos = createProcesses(lines, fastafile, outputFileName, outputFileNameAccnos, fastafileTemp); } if (m->control_pressed) { return 0; } if (qualfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)); string outputQualFileName = getOutputFileName("qfile", variables); outputNames.push_back(outputQualFileName); outputTypes["qfile"].push_back(outputQualFileName); processQual(outputQualFileName, fastafileTemp); m->mothurRemove(fastafileTemp); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (wroteAccnos) { outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos); //use remove.seqs to create new name, group and count file if ((countfile != "") || (namefile != "") || (groupfile != "")) { string inputString = "accnos=" + outputFileNameAccnos; if (countfile != "") { inputString += ", count=" + countfile; } else{ if (namefile != "") { inputString += ", name=" + namefile; } if (groupfile != "") { inputString += ", group=" + groupfile; } } m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map<string, vector<string> > filenames = removeCommand->getOutputFiles(); delete removeCommand; m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); if (groupfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); string outGroup = getOutputFileName("group", variables); m->renameFile(filenames["group"][0], outGroup); outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup); } if (namefile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(namefile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile)); string outName = getOutputFileName("name", variables); m->renameFile(filenames["name"][0], outName); outputNames.push_back(outName); outputTypes["name"].push_back(outName); } if (countfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); string outCount = getOutputFileName("count", variables); m->renameFile(filenames["count"][0], outCount); outputNames.push_back(outCount); outputTypes["count"].push_back(outCount); } } } else { m->mothurRemove(outputFileNameAccnos); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } if (wroteAccnos) { //set accnos file as new current accnosfile itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "execute"); exit(1); } }