Esempio n. 1
0
int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
	try{
		vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
        vector< vector<seqDist>  > calcDists; calcDists.resize(treeCalculators.size()); 		
        
        for (int thisIter = 0; thisIter < iters; thisIter++) {
            
            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
            
            if (subsample) {
                SubSample sample;
                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
                
                //make copy of lookup so we don't get access violations
                vector<SharedRAbundVector*> newLookup;
                for (int k = 0; k < thisItersLookup.size(); k++) {
                    SharedRAbundVector* temp = new SharedRAbundVector();
                    temp->setLabel(thisItersLookup[k]->getLabel());
                    temp->setGroup(thisItersLookup[k]->getGroup());
                    newLookup.push_back(temp);
                }
                
                //for each bin
                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                }
                
                tempLabels = sample.getSample(newLookup, subsampleSize);
                thisItersLookup = newLookup;
            }
            
            if(processors == 1){
                driver(thisItersLookup, 0, numGroups, calcDists);
            }else{
                int process = 1;
                vector<int> processIDS;
                
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //loop through and create all the processes you want
                while (process != processors) {
                    pid_t pid = fork();
                    
                    if (pid > 0) {
                        processIDS.push_back(pid); 
                        process++;
                    }else if (pid == 0){
                        
                        driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);   
                        
                        string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist";
                        ofstream outtemp;
                        m->openOutputFile(tempdistFileName, outtemp);
                        
                        for (int i = 0; i < calcDists.size(); i++) {
                            outtemp << calcDists[i].size() << endl;
                            
                            for (int j = 0; j < calcDists[i].size(); j++) {
                                outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
                            }
                        }
                        outtemp.close();
                        
                        exit(0);
                    }else { 
                        m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                        exit(0);
                    }
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                
                //force parent to wait until all the processes are done
                for (int i = 0; i < processIDS.size(); i++) {
                    int temp = processIDS[i];
                    wait(&temp);
                }
                
                for (int i = 0; i < processIDS.size(); i++) {
                    string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) +  ".dist";
                    ifstream intemp;
                    m->openInputFile(tempdistFileName, intemp);
                    
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = 0;
                        intemp >> size; m->gobble(intemp);
                        
                        for (int j = 0; j < size; j++) {
                            int seq1 = 0;
                            int seq2 = 0;
                            float dist = 1.0;
                            
                            intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
                            
                            seqDist tempDist(seq1, seq2, dist);
                            calcDists[k].push_back(tempDist);
                        }
                    }
                    intemp.close();
                    m->mothurRemove(tempdistFileName);
                }
#else
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                //Windows version shared memory, so be careful when passing variables through the treeSharedData struct. 
                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
                //Taking advantage of shared memory to pass results vectors.
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                
                vector<treeSharedData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
                for( int i=1; i<processors; i++ ){
                    
                    //make copy of lookup so we don't get access violations
                    vector<SharedRAbundVector*> newLookup;
                    for (int k = 0; k < thisItersLookup.size(); k++) {
                        SharedRAbundVector* temp = new SharedRAbundVector();
                        temp->setLabel(thisItersLookup[k]->getLabel());
                        temp->setGroup(thisItersLookup[k]->getGroup());
                        newLookup.push_back(temp);
                    }
                    
                    //for each bin
                    for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                        for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                    }
                    
                    // Allocate memory for thread data.
                    treeSharedData* tempSum = new treeSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup);
                    pDataArray.push_back(tempSum);
                    processIDS.push_back(i);
                    
                    hThreadArray[i-1] = CreateThread(NULL, 0, MyTreeSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
                    }
                    for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
                    
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = pDataArray[i]->calcDists[k].size();
                        for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
                    }
                    
                    CloseHandle(hThreadArray[i]);
                    delete pDataArray[i];
                }
                
#endif
            }
            
            calcDistsTotals.push_back(calcDists);
            
            if (subsample) {  
                
                //clean up memory
                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
                thisItersLookup.clear();
                for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
            }
            
            if (m->debug) {  m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
		}
        
		if (m->debug) {  m->mothurOut("[DEBUG]: done with iters.\n"); }
            
        if (iters != 1) {
            //we need to find the average distance and standard deviation for each groups distance
            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);  
            
            if (m->debug) {  m->mothurOut("[DEBUG]: found averages.\n"); }
            
            //create average tree for each calc
            for (int i = 0; i < calcDists.size(); i++) {
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                
                for (int j = 0; j < calcAverages[i].size(); j++) {
                    int row = calcAverages[i][j].seq1;
                    int column = calcAverages[i][j].seq2;
                    float dist = calcAverages[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "ave";
                string outputFile = getOutputFileName("tree",variables);				
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = createTree(matrix);
                if (newTree != NULL) { writeTree(outputFile, newTree); }                
            }
            
            if (m->debug) {  m->mothurOut("[DEBUG]: done averages trees.\n"); }
            
            //create all trees for each calc and find their consensus tree
            for (int i = 0; i < calcDists.size(); i++) {
                if (m->control_pressed) { break; }
                
                //create a new filename
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "all";
                string outputFile = getOutputFileName("tree",variables);				
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                ofstream outAll;
                m->openOutputFile(outputFile, outAll);
                
                vector<Tree*> trees; 
                for (int myIter = 0; myIter < iters; myIter++) {
                    
                    if(m->control_pressed) { break; }
                    
                    //initialize matrix
                    vector< vector<double> > matrix; //square matrix to represent the distance
                    matrix.resize(thisLookup.size());
                    for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                    
                    for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) {
                        int row = calcDistsTotals[myIter][i][j].seq1;
                        int column = calcDistsTotals[myIter][i][j].seq2;
                        double dist = calcDistsTotals[myIter][i][j].dist;
                       
                        matrix[row][column] = dist;
                        matrix[column][row] = dist;
                    }
                    
                    //creates tree from similarity matrix and write out file
                    Tree* newTree = createTree(matrix);
                    if (newTree != NULL) { 
                        newTree->print(outAll);
                        trees.push_back(newTree);
                    }
                }
                outAll.close();
                if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
                
                if (m->debug) {  m->mothurOut("[DEBUG]: done all trees.\n"); }
                
                Consensus consensus;
                //clear old tree names if any
                m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
                Tree* conTree = consensus.getTree(trees);
                
                if (m->debug) {  m->mothurOut("[DEBUG]: done cons tree.\n"); }
                
                //create a new filename
                variables["[tag]"] = "cons";
                string conFile = getOutputFileName("tree",variables);				
                outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
                ofstream outTree;
                m->openOutputFile(conFile, outTree);
                
                if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
            }

        }else {
            
            for (int i = 0; i < calcDists.size(); i++) {
                if (m->control_pressed) { break; }
                
                //initialize matrix
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                
                for (int j = 0; j < calcDists[i].size(); j++) {
                    int row = calcDists[i][j].seq1;
                    int column = calcDists[i][j].seq2;
                    double dist = calcDists[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "";
                string outputFile = getOutputFileName("tree",variables);					
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = createTree(matrix);
                if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; }
            }
        }
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "process");
		exit(1);
	}
}
Esempio n. 2
0
int TreeGroupCommand::createProcesses(SharedRAbundVectors*& thisLookup, CountTable& ct){
    try {
        
        vector<string> groupNames = thisLookup->getNamesGroups();
        Treenames = groupNames; //may have changed if subsample eliminated groups
        
        vector<int> lines;
        if (processors > (iters+1)) { processors = iters+1; }
        
        //figure out how many sequences you have to process
        int numItersPerProcessor = (iters+1) / processors;
        for (int i = 0; i < processors; i++) {
            if(i == (processors - 1)){	numItersPerProcessor = (iters+1) - i * numItersPerProcessor; 	}
            lines.push_back(numItersPerProcessor);
        }
        
        //create array of worker threads
        vector<thread*> workerThreads;
        vector<treeSharedData*> data;
        
        //Lauch worker threads
        for (int i = 0; i < processors-1; i++) {
            
            //make copy of lookup so we don't get access violations
            SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup);
            treeSharedData* dataBundle = new treeSharedData(lines[i+1], false, subsample, withReplacement, subsampleSize, Estimators, newLookup);
            
            data.push_back(dataBundle);
            workerThreads.push_back(new thread(process, dataBundle));
        }
        
        //make copy of lookup so we don't get access violations
        SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup);
        treeSharedData* dataBundle = new treeSharedData(lines[0], true, subsample, withReplacement, subsampleSize, Estimators, newLookup);
        process(dataBundle);
        delete newLookup;
        
        Estimators.clear(); Estimators = dataBundle->Estimators;
        vector< vector< vector<seqDist> > > calcDistsTotals = dataBundle->calcDistsTotals;
        vector< vector< vector<double> > > matrices = dataBundle->matrices;
        
        for (int i = 0; i < processors-1; i++) {
            workerThreads[i]->join();
            
            //get calcDistsTotal info - one entry per iter
            for (int j = 0; j < data[i]->calcDistsTotals.size(); j++) { calcDistsTotals.push_back(data[i]->calcDistsTotals[j]); }
            
            delete data[i]->thisLookup;
            delete data[i];
            delete workerThreads[i];
        }
        delete dataBundle;
        if (iters != 1) {
            //we need to find the average distance and standard deviation for each groups distance
            vector< vector<seqDist>  > calcAverages = util.getAverages(calcDistsTotals);
            
            if (m->getDebug()) {  m->mothurOut("[DEBUG]: found averages.\n"); }
            
            //create average tree for each calc
            for (int i = 0; i < Estimators.size(); i++) {
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup->size());
                for (int k = 0; k < thisLookup->size(); k++) {  matrix[k].resize(thisLookup->size(), 0.0); }
                
                for (int j = 0; j < calcAverages[i].size(); j++) {
                    int row = calcAverages[i][j].seq1;
                    int column = calcAverages[i][j].seq2;
                    float dist = calcAverages[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
                variables["[calc]"] = Estimators[i];
                variables["[distance]"] = thisLookup->getLabel();
                variables["[tag]"] = "ave";
                string outputFile = getOutputFileName("tree",variables);
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = new Tree(&ct, matrix, Treenames);
                if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
                else { newTree->assembleTree(); }
                if (newTree != NULL) { newTree->createNewickFile(outputFile);  delete newTree; }
            }
            
            if (m->getDebug()) {  m->mothurOut("[DEBUG]: done averages trees.\n"); }
            
            //create all trees for each calc and find their consensus tree
            for (int i = 0; i < Estimators.size(); i++) {
                if (m->getControl_pressed()) { break; }
                
                //create a new filename
                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
                variables["[calc]"] = Estimators[i];
                variables["[distance]"] = thisLookup->getLabel();
                variables["[tag]"] = "all";
                string outputFile = getOutputFileName("tree",variables);
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                
                ofstream outAll;
                util.openOutputFile(outputFile, outAll);
                
                vector<Tree*> trees;
                for (int myIter = 0; myIter < iters; myIter++) {
                    
                    if(m->getControl_pressed()) { break; }
                    
                    //initialize matrix
                    vector< vector<double> > matrix; //square matrix to represent the distance
                    matrix.resize(thisLookup->size());
                    for (int k = 0; k < thisLookup->size(); k++) {  matrix[k].resize(thisLookup->size(), 0.0); }
                    
                    for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) {
                        int row = calcDistsTotals[myIter][i][j].seq1;
                        int column = calcDistsTotals[myIter][i][j].seq2;
                        double dist = calcDistsTotals[myIter][i][j].dist;
                        
                        matrix[row][column] = dist;
                        matrix[column][row] = dist;
                    }
                    
                    //creates tree from similarity matrix and write out file
                    Tree* newTree = new Tree(&ct, matrix, Treenames);
                    if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
                    else { newTree->assembleTree(); }
                    if (newTree != NULL) {
                        newTree->print(outAll);
                        trees.push_back(newTree);
                    }
                }
                outAll.close();
                if (m->getControl_pressed()) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
                
                if (m->getDebug()) {  m->mothurOut("[DEBUG]: done all trees.\n"); }
                
                Consensus consensus;
                Tree* conTree = consensus.getTree(trees);
                
                if (m->getDebug()) {  m->mothurOut("[DEBUG]: done cons tree.\n"); }
                
                //create a new filename
                variables["[tag]"] = "cons";
                string conFile = getOutputFileName("tree",variables);
            
                outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
                ofstream outTree;
                util.openOutputFile(conFile, outTree);
                
                if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
            }
        }else {
            for (int i = 0; i < matrices.size(); i++) {
                if (m->getControl_pressed()) { break; }
                
                //initialize matrix
                vector< vector<double> > matrix = matrices[i]; //square matrix to represent the distance
                
                //create a new filename
                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
                variables["[calc]"] = Estimators[i];
                variables["[distance]"] = thisLookup->getLabel();
                variables["[tag]"] = "";
                string outputFile = getOutputFileName("tree",variables);
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = new Tree(&ct, matrix, Treenames);
                if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
                else { newTree->assembleTree(); }
                if (newTree != NULL) { newTree->createNewickFile(outputFile);  delete newTree; }
            }
        }
        
        return 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "TreeGroupCommand", "createProcesses");
        exit(1);
    }
}