//********************************************************************************************************************** int MergeGroupsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) { try { vector<SharedRAbundVector*> newLookup; for (int i = 0; i < thislookup.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thislookup[i]->getLabel()); temp->setGroup(thislookup[i]->getGroup()); newLookup.push_back(temp); } //for each bin vector<string> newBinLabels; string snumBins = toString(thislookup[0]->getNumBins()); for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } //look at each sharedRabund and make sure they are not all zero bool allZero = true; for (int j = 0; j < thislookup.size(); j++) { if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } } //if they are not all zero add this bin if (!allZero) { for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } //if there is a bin label use it otherwise make one string binLabel = "Otu"; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup.clear(); thislookup = newLookup; m->currentSharedBinLabels = newBinLabels; return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "eliminateZeroOTUS"); exit(1); } }
//********************************************************************************************************************** int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) { try { vector<SharedRAbundVector*> newLookup; for (int i = 0; i < thislookup.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thislookup[i]->getLabel()); temp->setGroup(thislookup[i]->getGroup()); newLookup.push_back(temp); } //for each bin for (int i = 0; i < thislookup[0]->getNumBins(); i++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } //look at each sharedRabund and make sure they are not all zero bool allZero = true; for (int j = 0; j < thislookup.size(); j++) { if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } } //if they are not all zero add this bin if (!allZero) { for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); } //if there is a bin label use it otherwise make one } //else{ cout << "bin # " << i << " is all zeros" << endl; } } for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "eliminateZeroOTUS"); exit(1); } }
SharedRAbundVector SharedSAbundVector::getSharedRAbundVector(){ try { SharedRAbundVector rav; for(int i=1;i<data.size();i++){ for(int j=0;j<data[i].abundance;j++){ rav.push_back(i, data[i].group); } } sort(rav.rbegin(), rav.rend(), compareMembers); rav.setLabel(label); rav.setGroup(group); return rav; } catch(exception& e) { m->errorOut(e, "SharedSAbundVector", "getSharedRAbundVector"); exit(1); } }
int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){ try { if(processors == 1){ driver(0, namesOfGroupCombos.size(), thisLookUp); }else{ int process = 1; vector<int> processIDS; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ driver(lines[process].start, lines[process].num, thisLookUp); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } //do my part driver(lines[0].start, lines[0].num, thisLookUp); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { int temp = processIDS[i]; wait(&temp); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to pass results vectors. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<metastatsData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=1; i<processors; i++ ){ //make copy of lookup so we don't get access violations vector<SharedRAbundVector*> newLookup; vector<string> designMapGroups; for (int k = 0; k < thisLookUp.size(); k++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thisLookUp[k]->getLabel()); temp->setGroup(thisLookUp[k]->getGroup()); newLookup.push_back(temp); designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup())); } //for each bin for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); } } // Allocate memory for thread data. metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold); pDataArray.push_back(tempSum); processIDS.push_back(i); hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } //do my part driver(lines[0].start, lines[0].num, thisLookUp); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ if (pDataArray[i]->count != (pDataArray[i]->num)) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; } for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; } for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) { outputNames.push_back(pDataArray[i]->outputNames[j]); outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]); } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif } return 0; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "process"); exit(1); } }
int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){ try { vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files vector< vector<seqDist> > calcDists; calcDists.resize(matrixCalculators.size()); for (int thisIter = 0; thisIter < iters+1; thisIter++) { map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = thisLookup[0]->getLabel(); variables["[tag2]"] = ""; vector<SharedRAbundVector*> thisItersLookup = thisLookup; if (subsample && (thisIter != 0)) { SubSample sample; vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds //make copy of lookup so we don't get access violations vector<SharedRAbundVector*> newLookup; for (int k = 0; k < thisItersLookup.size(); k++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thisItersLookup[k]->getLabel()); temp->setGroup(thisItersLookup[k]->getGroup()); newLookup.push_back(temp); } //for each bin for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } } tempLabels = sample.getSample(newLookup, subsampleSize); thisItersLookup = newLookup; } if(processors == 1){ driver(thisItersLookup, 0, numGroups, calcDists); }else{ int process = 1; vector<int> processIDS; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); process++; }else if (pid == 0){ driver(thisItersLookup, lines[process].start, lines[process].end, calcDists); string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist"; ofstream outtemp; m->openOutputFile(tempdistFileName, outtemp); for (int i = 0; i < calcDists.size(); i++) { outtemp << calcDists[i].size() << endl; for (int j = 0; j < calcDists[i].size(); j++) { outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; } } outtemp.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } //parent do your part driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); //force parent to wait until all the processes are done for (int i = 0; i < processIDS.size(); i++) { int temp = processIDS[i]; wait(&temp); } for (int i = 0; i < processIDS.size(); i++) { string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) + ".dist"; ifstream intemp; m->openInputFile(tempdistFileName, intemp); for (int k = 0; k < calcDists.size(); k++) { int size = 0; intemp >> size; m->gobble(intemp); for (int j = 0; j < size; j++) { int seq1 = 0; int seq2 = 0; float dist = 1.0; intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); seqDist tempDist(seq1, seq2, dist); calcDists[k].push_back(tempDist); } } intemp.close(); m->mothurRemove(tempdistFileName); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the distSharedData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to pass results vectors. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<distSharedData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=1; i<processors; i++ ){ //make copy of lookup so we don't get access violations vector<SharedRAbundVector*> newLookup; for (int k = 0; k < thisItersLookup.size(); k++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thisItersLookup[k]->getLabel()); temp->setGroup(thisItersLookup[k]->getGroup()); newLookup.push_back(temp); } //for each bin for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } } // Allocate memory for thread data. distSharedData* tempSum = new distSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup); pDataArray.push_back(tempSum); processIDS.push_back(i); hThreadArray[i-1] = CreateThread(NULL, 0, MyDistSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } //parent do your part driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; } for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } for (int k = 0; k < calcDists.size(); k++) { int size = pDataArray[i]->calcDists[k].size(); for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); } } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif } if (subsample && (thisIter != 0)) { if((thisIter) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter)+"\n"); } calcDistsTotals.push_back(calcDists); for (int i = 0; i < calcDists.size(); i++) { for (int j = 0; j < calcDists[i].size(); j++) { if (m->debug) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n"); } } } //clean up memory for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; } thisItersLookup.clear(); }else { //print results for whole dataset for (int i = 0; i < calcDists.size(); i++) { if (m->control_pressed) { break; } //initialize matrix vector< vector<double> > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } for (int j = 0; j < calcDists[i].size(); j++) { int row = calcDists[i][j].seq1; int column = calcDists[i][j].seq2; double dist = calcDists[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; } variables["[outputtag]"] = output; variables["[calc]"] = matrixCalculators[i]->getName(); string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outDist; m->openOutputFile(distFileName, outDist); outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); printSims(outDist, matrix); outDist.close(); } } for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } } if (iters != 0) { //we need to find the average distance and standard deviation for each groups distance vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals, mode); //find standard deviation vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); //print results for (int i = 0; i < calcDists.size(); i++) { vector< vector<double> > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } vector< vector<double> > stdmatrix; //square matrix to represent the stdDev stdmatrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); } for (int j = 0; j < calcAverages[i].size(); j++) { int row = calcAverages[i][j].seq1; int column = calcAverages[i][j].seq2; float dist = calcAverages[i][j].dist; float stdDist = stdDev[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; stdmatrix[row][column] = stdDist; stdmatrix[column][row] = stdDist; } map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = thisLookup[0]->getLabel(); variables["[outputtag]"] = output; variables["[tag2]"] = "ave"; variables["[calc]"] = matrixCalculators[i]->getName(); string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); //set current phylip file to average distance matrix m->setPhylipFile(distFileName); ofstream outAve; m->openOutputFile(distFileName, outAve); outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); printSims(outAve, matrix); outAve.close(); variables["[tag2]"] = "std"; distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outSTD; m->openOutputFile(distFileName, outSTD); outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint); printSims(outSTD, stdmatrix); outSTD.close(); } } return 0; } catch(exception& e) { m->errorOut(e, "MatrixOutputCommand", "process"); exit(1); } }
int MergeGroupsCommand::process(SharedRAbundVectors*& thisLookUp, ofstream& out, bool& printHeaders){ try { vector<string> setNames = designMap->getCategory(); //create sharedRabundVectors vector<SharedRAbundVector*> data = thisLookUp->getSharedRAbundVectors(); //create SharedRAbundVectors for the merged groups. Fill with blank rabundFloatVectors SharedRAbundVectors* merged; merged = new SharedRAbundVectors(); for (int i = 0; i < setNames.size(); i++) { SharedRAbundVector* myLookup = new SharedRAbundVector(thisLookUp->getNumBins()); myLookup->setLabel(thisLookUp->getLabel()); myLookup->setGroup(setNames[i]); merged->push_back(myLookup); } //for each OTU for (int j = 0; j < data[0]->getNumBins(); j++) { if (m->getControl_pressed()) { break; } map<string, vector<int> > otusGroupAbunds; map<string, vector<int> >::iterator itAbunds; //for each sample for (int i = 0; i < data.size(); i++) { string grouping = designMap->get(data[i]->getGroup()); //what set to your belong to if (grouping == "not found") { m->mothurOut("[ERROR]: " + data[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; } else { //Add this OTUs values to sets abunds itAbunds = otusGroupAbunds.find(grouping); if (itAbunds == otusGroupAbunds.end()) { //new group vector<int> temp; temp.push_back(data[i]->get(j)); otusGroupAbunds[grouping] = temp; }else { (itAbunds->second).push_back(data[i]->get(j)); } } } //find results for this bin. Set merged value for this bin in the results for (itAbunds = otusGroupAbunds.begin(); itAbunds != otusGroupAbunds.end(); itAbunds++) { int abund = mergeAbund(itAbunds->second); merged->set(j, abund, itAbunds->first); } } //free memory for (int i = 0; i < data.size(); i++) { delete data[i]; } if (m->getControl_pressed()) { delete merged; return 0; } merged->eliminateZeroOTUS(); // remove any zero OTUs created by median option. //print new file merged->print(out, printHeaders); delete merged; return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "process"); exit(1); } }
int Collect::getSharedCurve(float percentFreq = 0.01){ try { vector<SharedRAbundVector*> lookup; map<string, int> indexLookup; vector<SharedRAbundVector*> subset; //create and initialize vector of sharedvectors, one for each group vector<string> mGroups = sharedorder->getGroups(); int numGroups = mGroups.size(); for (int i = 0; i < mGroups.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(sharedorder->getNumBins()); temp->setLabel(sharedorder->getLabel()); temp->setGroup(mGroups[i]); indexLookup[mGroups[i]] = i; lookup.push_back(temp); } SharedCollectorsCurveData ccd; //initialize labels for output //makes 'uniqueAB uniqueAC uniqueBC' if your groups are A, B, C getGroupComb(mGroups); for(int i=0;i<displays.size();i++){ ccd.registerDisplay(displays[i]); //adds a display[i] to cdd bool hasLciHci = displays[i]->hasLciHci(); groupLabel = ""; for (int s = 0; s < groupComb.size(); s++) { if (hasLciHci) { groupLabel = groupLabel + label + groupComb[s] + "\t" + label + groupComb[s] + "lci\t" + label + groupComb[s] + "hci\t"; } else{ groupLabel = groupLabel + label + groupComb[s] + "\t"; } } string groupLabelAll = groupLabel + label + "all\t"; if ((displays[i]->isCalcMultiple() ) && (displays[i]->getAll() )) { displays[i]->init(groupLabelAll); } else { displays[i]->init(groupLabel); } } //convert freq percentage to number int increment = 1; if (percentFreq < 1.0) { increment = numSeqs * percentFreq; } else { increment = percentFreq; } //sample all the members for(int i=0;i<numSeqs;i++){ if (m->getControl_pressed()) { for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 1; } //get first sample individual chosen = sharedorder->get(i); int abundance = lookup[indexLookup[chosen.group]]->get(chosen.bin); lookup[indexLookup[chosen.group]]->set(chosen.bin, (abundance + 1)); //calculate at 0 and the given increment if((i == 0) || (i+1) % increment == 0){ //how many comparisons to make i.e. for group a, b, c = ab, ac, bc. int n = 1; bool pair = true; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabund vectors subset.push_back(lookup[k]); subset.push_back(lookup[l]); //load subset with rest of lookup for those calcs that need everyone to calc for a pair for (int w = 0; w < lookup.size(); w++) { if ((w != k) && (w != l)) { subset.push_back(lookup[w]); } } ccd.updateSharedData(subset, i+1, numGroups, pair, mGroups); } n++; } //if this is a calculator that can do multiples then do them pair = false; ccd.updateSharedData(lookup, i+1, numGroups, pair, mGroups); } totalNumSeq = i+1; } //calculate last label if you haven't already if(numSeqs % increment != 0){ //how many comparisons to make i.e. for group a, b, c = ab, ac, bc. int n = 1; bool pair = true; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabund vectors subset.push_back(lookup[k]); subset.push_back(lookup[l]); //load subset with rest of lookup for those calcs that need everyone to calc for a pair for (int w = 0; w < lookup.size(); w++) { if ((w != k) && (w != l)) { subset.push_back(lookup[w]); } } ccd.updateSharedData(subset, totalNumSeq, numGroups, pair, mGroups); } n++; } //if this is a calculator that can do multiples then do them pair = false; ccd.updateSharedData(lookup, totalNumSeq, numGroups, pair, mGroups); } //resets output files for(int i=0;i<displays.size();i++){ displays[i]->reset(); } //memory cleanup for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } catch(exception& e) { m->errorOut(e, "Collect", "getSharedCurve"); exit(1); } }
int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) { try{ vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files vector< vector<seqDist> > calcDists; calcDists.resize(treeCalculators.size()); for (int thisIter = 0; thisIter < iters; thisIter++) { vector<SharedRAbundVector*> thisItersLookup = thisLookup; if (subsample) { SubSample sample; vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds //make copy of lookup so we don't get access violations vector<SharedRAbundVector*> newLookup; for (int k = 0; k < thisItersLookup.size(); k++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thisItersLookup[k]->getLabel()); temp->setGroup(thisItersLookup[k]->getGroup()); newLookup.push_back(temp); } //for each bin for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } } tempLabels = sample.getSample(newLookup, subsampleSize); thisItersLookup = newLookup; } if(processors == 1){ driver(thisItersLookup, 0, numGroups, calcDists); }else{ int process = 1; vector<int> processIDS; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); process++; }else if (pid == 0){ driver(thisItersLookup, lines[process].start, lines[process].end, calcDists); string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist"; ofstream outtemp; m->openOutputFile(tempdistFileName, outtemp); for (int i = 0; i < calcDists.size(); i++) { outtemp << calcDists[i].size() << endl; for (int j = 0; j < calcDists[i].size(); j++) { outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; } } outtemp.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } //parent do your part driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); //force parent to wait until all the processes are done for (int i = 0; i < processIDS.size(); i++) { int temp = processIDS[i]; wait(&temp); } for (int i = 0; i < processIDS.size(); i++) { string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) + ".dist"; ifstream intemp; m->openInputFile(tempdistFileName, intemp); for (int k = 0; k < calcDists.size(); k++) { int size = 0; intemp >> size; m->gobble(intemp); for (int j = 0; j < size; j++) { int seq1 = 0; int seq2 = 0; float dist = 1.0; intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); seqDist tempDist(seq1, seq2, dist); calcDists[k].push_back(tempDist); } } intemp.close(); m->mothurRemove(tempdistFileName); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the treeSharedData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to pass results vectors. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<treeSharedData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=1; i<processors; i++ ){ //make copy of lookup so we don't get access violations vector<SharedRAbundVector*> newLookup; for (int k = 0; k < thisItersLookup.size(); k++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thisItersLookup[k]->getLabel()); temp->setGroup(thisItersLookup[k]->getGroup()); newLookup.push_back(temp); } //for each bin for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } } // Allocate memory for thread data. treeSharedData* tempSum = new treeSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup); pDataArray.push_back(tempSum); processIDS.push_back(i); hThreadArray[i-1] = CreateThread(NULL, 0, MyTreeSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } //parent do your part driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; } for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } for (int k = 0; k < calcDists.size(); k++) { int size = pDataArray[i]->calcDists[k].size(); for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); } } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif } calcDistsTotals.push_back(calcDists); if (subsample) { //clean up memory for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; } thisItersLookup.clear(); for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } } if (m->debug) { m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); } } if (m->debug) { m->mothurOut("[DEBUG]: done with iters.\n"); } if (iters != 1) { //we need to find the average distance and standard deviation for each groups distance vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals); if (m->debug) { m->mothurOut("[DEBUG]: found averages.\n"); } //create average tree for each calc for (int i = 0; i < calcDists.size(); i++) { vector< vector<double> > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } for (int j = 0; j < calcAverages[i].size(); j++) { int row = calcAverages[i][j].seq1; int column = calcAverages[i][j].seq2; float dist = calcAverages[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; } //create a new filename map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); variables["[calc]"] = treeCalculators[i]->getName(); variables["[distance]"] = thisLookup[0]->getLabel(); variables["[tag]"] = "ave"; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); //creates tree from similarity matrix and write out file Tree* newTree = createTree(matrix); if (newTree != NULL) { writeTree(outputFile, newTree); } } if (m->debug) { m->mothurOut("[DEBUG]: done averages trees.\n"); } //create all trees for each calc and find their consensus tree for (int i = 0; i < calcDists.size(); i++) { if (m->control_pressed) { break; } //create a new filename //create a new filename map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); variables["[calc]"] = treeCalculators[i]->getName(); variables["[distance]"] = thisLookup[0]->getLabel(); variables["[tag]"] = "all"; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream outAll; m->openOutputFile(outputFile, outAll); vector<Tree*> trees; for (int myIter = 0; myIter < iters; myIter++) { if(m->control_pressed) { break; } //initialize matrix vector< vector<double> > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) { int row = calcDistsTotals[myIter][i][j].seq1; int column = calcDistsTotals[myIter][i][j].seq2; double dist = calcDistsTotals[myIter][i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; } //creates tree from similarity matrix and write out file Tree* newTree = createTree(matrix); if (newTree != NULL) { newTree->print(outAll); trees.push_back(newTree); } } outAll.close(); if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } } if (m->debug) { m->mothurOut("[DEBUG]: done all trees.\n"); } Consensus consensus; //clear old tree names if any m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups Tree* conTree = consensus.getTree(trees); if (m->debug) { m->mothurOut("[DEBUG]: done cons tree.\n"); } //create a new filename variables["[tag]"] = "cons"; string conFile = getOutputFileName("tree",variables); outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); ofstream outTree; m->openOutputFile(conFile, outTree); if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; } } }else { for (int i = 0; i < calcDists.size(); i++) { if (m->control_pressed) { break; } //initialize matrix vector< vector<double> > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } for (int j = 0; j < calcDists[i].size(); j++) { int row = calcDists[i][j].seq1; int column = calcDists[i][j].seq2; double dist = calcDists[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; } //create a new filename map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); variables["[calc]"] = treeCalculators[i]->getName(); variables["[distance]"] = thisLookup[0]->getLabel(); variables["[tag]"] = ""; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); //creates tree from similarity matrix and write out file Tree* newTree = createTree(matrix); if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; } } } return 0; } catch(exception& e) { m->errorOut(e, "TreeGroupCommand", "process"); exit(1); } }