Beispiel #1
0
int main(int argc, char **argv)
{
    if (argc != NUM_ARGS+1) {
        usage();
    }
    
    srand(time(0));
    
    Graph *graph = new Graph(argv[1]);
    Dendrogram *dendrogram = new Dendrogram(graph,argv[2]);
    int sampleSpread = atoi(argv[3]);
    int sampleCount = atoi(argv[4]);
    int sampleEvery = sampleSpread / sampleCount;
    const char *outputPrefix = argv[5];
    double threshold = atof(argv[6]);
    graph->threshold = threshold;
    
    DendrogramSet samples;
    
    for (int sampleIndex = 0; sampleIndex < sampleSpread; sampleIndex++) {
        dendrogram->sample();
        
        if (sampleIndex % sampleEvery == 0) {
            cout << "Sampling at " << sampleIndex << " / " << sampleSpread << endl;
            
            samples.insert(new Dendrogram(dendrogram));
        }
    }
    
    Consensus *consensus = new Consensus(samples,graph);
    
    // Print to console
    cout << consensus->toString() << endl;
    
    // Save (in various forms) to disk
    char filename[80];
    sprintf(filename,"%s.consensus.txt",outputPrefix);
    cerr << filename << endl;
    ofstream stringFile(filename);
    stringFile << consensus->toString() << endl;
    stringFile.close();
    
    sprintf(filename,"%s.consensus.dot",outputPrefix);
    cerr << filename << endl;
    ofstream dotFile(filename);
    dotFile << consensus->toDot() << endl;
    dotFile.close();
    
    sprintf(filename,"%s.consensus.matrix",outputPrefix);
    cerr << filename << endl;
    ofstream matrixFile(filename);
    matrixFile << consensus->toMatrix() << endl;
    matrixFile.close();
}
Beispiel #2
0
int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
	try{
		vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
        vector< vector<seqDist>  > calcDists; calcDists.resize(treeCalculators.size()); 		
        
        for (int thisIter = 0; thisIter < iters; thisIter++) {
            
            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
            
            if (subsample) {
                SubSample sample;
                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
                
                //make copy of lookup so we don't get access violations
                vector<SharedRAbundVector*> newLookup;
                for (int k = 0; k < thisItersLookup.size(); k++) {
                    SharedRAbundVector* temp = new SharedRAbundVector();
                    temp->setLabel(thisItersLookup[k]->getLabel());
                    temp->setGroup(thisItersLookup[k]->getGroup());
                    newLookup.push_back(temp);
                }
                
                //for each bin
                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                }
                
                tempLabels = sample.getSample(newLookup, subsampleSize);
                thisItersLookup = newLookup;
            }
            
            if(processors == 1){
                driver(thisItersLookup, 0, numGroups, calcDists);
            }else{
                int process = 1;
                vector<int> processIDS;
                
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //loop through and create all the processes you want
                while (process != processors) {
                    pid_t pid = fork();
                    
                    if (pid > 0) {
                        processIDS.push_back(pid); 
                        process++;
                    }else if (pid == 0){
                        
                        driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);   
                        
                        string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist";
                        ofstream outtemp;
                        m->openOutputFile(tempdistFileName, outtemp);
                        
                        for (int i = 0; i < calcDists.size(); i++) {
                            outtemp << calcDists[i].size() << endl;
                            
                            for (int j = 0; j < calcDists[i].size(); j++) {
                                outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
                            }
                        }
                        outtemp.close();
                        
                        exit(0);
                    }else { 
                        m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                        exit(0);
                    }
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                
                //force parent to wait until all the processes are done
                for (int i = 0; i < processIDS.size(); i++) {
                    int temp = processIDS[i];
                    wait(&temp);
                }
                
                for (int i = 0; i < processIDS.size(); i++) {
                    string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) +  ".dist";
                    ifstream intemp;
                    m->openInputFile(tempdistFileName, intemp);
                    
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = 0;
                        intemp >> size; m->gobble(intemp);
                        
                        for (int j = 0; j < size; j++) {
                            int seq1 = 0;
                            int seq2 = 0;
                            float dist = 1.0;
                            
                            intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
                            
                            seqDist tempDist(seq1, seq2, dist);
                            calcDists[k].push_back(tempDist);
                        }
                    }
                    intemp.close();
                    m->mothurRemove(tempdistFileName);
                }
#else
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                //Windows version shared memory, so be careful when passing variables through the treeSharedData struct. 
                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
                //Taking advantage of shared memory to pass results vectors.
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                
                vector<treeSharedData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
                for( int i=1; i<processors; i++ ){
                    
                    //make copy of lookup so we don't get access violations
                    vector<SharedRAbundVector*> newLookup;
                    for (int k = 0; k < thisItersLookup.size(); k++) {
                        SharedRAbundVector* temp = new SharedRAbundVector();
                        temp->setLabel(thisItersLookup[k]->getLabel());
                        temp->setGroup(thisItersLookup[k]->getGroup());
                        newLookup.push_back(temp);
                    }
                    
                    //for each bin
                    for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                        for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                    }
                    
                    // Allocate memory for thread data.
                    treeSharedData* tempSum = new treeSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup);
                    pDataArray.push_back(tempSum);
                    processIDS.push_back(i);
                    
                    hThreadArray[i-1] = CreateThread(NULL, 0, MyTreeSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
                    }
                    for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
                    
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = pDataArray[i]->calcDists[k].size();
                        for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
                    }
                    
                    CloseHandle(hThreadArray[i]);
                    delete pDataArray[i];
                }
                
#endif
            }
            
            calcDistsTotals.push_back(calcDists);
            
            if (subsample) {  
                
                //clean up memory
                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
                thisItersLookup.clear();
                for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
            }
            
            if (m->debug) {  m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
		}
        
		if (m->debug) {  m->mothurOut("[DEBUG]: done with iters.\n"); }
            
        if (iters != 1) {
            //we need to find the average distance and standard deviation for each groups distance
            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);  
            
            if (m->debug) {  m->mothurOut("[DEBUG]: found averages.\n"); }
            
            //create average tree for each calc
            for (int i = 0; i < calcDists.size(); i++) {
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                
                for (int j = 0; j < calcAverages[i].size(); j++) {
                    int row = calcAverages[i][j].seq1;
                    int column = calcAverages[i][j].seq2;
                    float dist = calcAverages[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "ave";
                string outputFile = getOutputFileName("tree",variables);				
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = createTree(matrix);
                if (newTree != NULL) { writeTree(outputFile, newTree); }                
            }
            
            if (m->debug) {  m->mothurOut("[DEBUG]: done averages trees.\n"); }
            
            //create all trees for each calc and find their consensus tree
            for (int i = 0; i < calcDists.size(); i++) {
                if (m->control_pressed) { break; }
                
                //create a new filename
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "all";
                string outputFile = getOutputFileName("tree",variables);				
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                ofstream outAll;
                m->openOutputFile(outputFile, outAll);
                
                vector<Tree*> trees; 
                for (int myIter = 0; myIter < iters; myIter++) {
                    
                    if(m->control_pressed) { break; }
                    
                    //initialize matrix
                    vector< vector<double> > matrix; //square matrix to represent the distance
                    matrix.resize(thisLookup.size());
                    for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                    
                    for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) {
                        int row = calcDistsTotals[myIter][i][j].seq1;
                        int column = calcDistsTotals[myIter][i][j].seq2;
                        double dist = calcDistsTotals[myIter][i][j].dist;
                       
                        matrix[row][column] = dist;
                        matrix[column][row] = dist;
                    }
                    
                    //creates tree from similarity matrix and write out file
                    Tree* newTree = createTree(matrix);
                    if (newTree != NULL) { 
                        newTree->print(outAll);
                        trees.push_back(newTree);
                    }
                }
                outAll.close();
                if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
                
                if (m->debug) {  m->mothurOut("[DEBUG]: done all trees.\n"); }
                
                Consensus consensus;
                //clear old tree names if any
                m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
                Tree* conTree = consensus.getTree(trees);
                
                if (m->debug) {  m->mothurOut("[DEBUG]: done cons tree.\n"); }
                
                //create a new filename
                variables["[tag]"] = "cons";
                string conFile = getOutputFileName("tree",variables);				
                outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
                ofstream outTree;
                m->openOutputFile(conFile, outTree);
                
                if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
            }

        }else {
            
            for (int i = 0; i < calcDists.size(); i++) {
                if (m->control_pressed) { break; }
                
                //initialize matrix
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                
                for (int j = 0; j < calcDists[i].size(); j++) {
                    int row = calcDists[i][j].seq1;
                    int column = calcDists[i][j].seq2;
                    double dist = calcDists[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "";
                string outputFile = getOutputFileName("tree",variables);					
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = createTree(matrix);
                if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; }
            }
        }
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "process");
		exit(1);
	}
}
Beispiel #3
0
int main(int argc, char* argv[])	{

	// initialise random 
	// Random::Random();

	string SampleName;
	string ChainName;
	string OutGroupFile;

	string Path = "";

	int catgtrpoint = 0;

	int rates = 0;
	int modes = 0;
	int sitestat = 0;
	int grepmode = 0;
	int coaff = 0;

	int verbose = 0;

	int extract = 0;
	int burnin = 0;
	int every = 1;
	int until = -1;
	int ncat = 20;

	int bf = 0;

	int rr = 0;

	int step = 10;
	int save = 0;

	int truelength = 0;
	int sitebranch = 0;

	int clock = 0;
	int ps = 0;

	int clustermodes = 0;

 	double mindist = 0.03;
 	int minsize = 10;

	double cutoff = 0.5;

	int nrep = 1;

	int popeff = 0;

	int branchfreqs = 0;

	int benchmark = 0;

	int intlnl = 0;

	double entropytimemax = 0;
	int entropyN = 0;

	int constcorrect = 0;
	double rho = 1;

	int sitelogl = 0;

	// read arguments

	try	{

		if (argc == 1)	{
			throw(0);
		}

		int i = 1;
		while (i < argc)	{
			string s = argv[i];

			if (s == "-c")	{
				i++;
				if (i == argc) throw(0);
				if (! IsFloat(argv[i])) throw(0);
				cutoff = atof(argv[i]);
			}
			else if (s == "-v")	{
				verbose = 2;
			}
			else if (s == "-rr")	{
				rr = 1;
			}
			else if (s == "-lnl")	{
				intlnl = 1;
			}
			else if (s == "-sitelogl")	{
				sitelogl = 1;
			}
			else if (s == "-cst")	{
				constcorrect = 1;
				i++;
				rho = atof(argv[i]);
			}
			else if (s == "-finitetime")	{
				i++;
				entropytimemax = atof(argv[i]);
				i++;
				entropyN = atoi(argv[i]);
			}
			else if (s == "-catcoaff")	{
				coaff = 1;
			}
			else if (s == "-ratecoaff")	{
				coaff = 2;
			}
			else if (s == "-mblcoaff")	{
				coaff = 3;
			}
			else if (s == "-popeff")	{
				popeff = 1;
			}
			else if (s == "-branchfreqs")	{
				branchfreqs = 1;
			}
			else if (s == "-step")	{
				i++;
				step = atoi(argv[i]);
			}
			else if (s == "-div")	{
				clock = 1;
			}
			else if (s == "-p")	{
				i++;
				Path = argv[i];
			}
			else if (s == "-ps")	{
				ps = 1;
			}
			else if (s == "-bf")	{
				bf = 1;
			}
			else if (s == "-l")	{
				truelength = 1;
			}
			else if (s == "-ll")	{
				truelength = 1;
				sitebranch = 1;
			}
			else if (s == "-gm")	{
				grepmode = 1;
			}
			else if (s == "-bench")	{
				benchmark = 1;
			}
			else if (s == "-nrep")	{
				i++;
				if (i == argc) throw(0);
				s = argv[i];
				if (! IsInt(s))	{
					throw(0);
				}
				nrep = atoi(argv[i]);
			}
			else if (s == "-s")	{
				save = 1;
			}
			else if (s == "-cl")	{
				clustermodes = 1;
			}
			else if ((s == "-sz") || (s == "-ms"))	{
				i++;
				if (i == argc) throw(0);
				s = argv[i];
				if (! IsInt(s))	{
					throw(0);
				}
				minsize = atoi(argv[i]);
			}
			else if ((s == "-ds") || (s == "-md"))	{
				i++;
				if (i == argc) throw(0);
				s = argv[i];
				if (! IsFloat(s))	{
					throw(0);
				}
				mindist = atof(argv[i]);
			}
			else if (s == "-ncat")	{
				i++;
				if (i == argc) throw(0);
				s = argv[i];
				if (! IsInt(s))	{
					throw(0);
				}
				ncat = atoi(argv[i]);
			}
			else if ( (s == "-m") || (s == "-modes") )	{
				modes = 1;
			}
			else if ( (s == "-r") || (s == "-rates") )	{
				rates = 1;
			}
			else if ( (s == "-ss") || (s == "-sitestat") )	{
				sitestat = 1;
			}
			else if (s == "-catgtrpoint")	{
				catgtrpoint = 1;
			}
			else if ( (s == "-x") || (s == "-extract") )	{
				extract = 1;
				i++;
				if (i == argc) throw(0);
				s = argv[i];
				if (! IsInt(s))	{
					throw(0);
				}
				burnin = atoi(argv[i]);
				i++;
				if (i == argc) throw(0);
				s = argv[i];
				if (IsInt(s))	{
					every = atoi(argv[i]);
					i++;
					if (i == argc) throw(0);
					s = argv[i];
					if (IsInt(s))	{
						until = atoi(argv[i]);
					}
					else	{
						i--;
					}
				}
				else	{
					i--;
				}
			}
			else	{
				if (i != (argc -1))	{
					throw(0);
				}
				ChainName = argv[i];
			}
			i++;
		}
		if ((SampleName == "") && (ChainName == ""))	{
			throw(0);
		}
	}
	catch(...)	{
		cerr << "readpb [-x <burnin> <every> <until>] <chainname> \n";
		cerr << '\n';
		cerr << "\tdefaults : burnin = 0, every = 1, until the end\n";
		cerr << '\n';
		cerr << "additional options:\n";
		cerr << "\t-c <cutoff> : collapses all groups with posterior probability lower than cutoff\n"; 
		cerr << "\t-m          : posterior distribution of the number of modes\n";
		cerr << "\t-ss         : mean posterior site-specific stationaries\n";
		cerr << "\t-r          : mean posterior site-specific rates (continuous gamma only)\n";
		cerr << '\n';
		cerr << "\t-ncat <n>   : defines number of bins for rate histogram (default 20)\n";
		cerr << '\n';
		cerr << "\t-cl         : mode clustering\n";
		cerr << "\t\t-ms: cluster min size (default : 10)\n";
		cerr << "\t\t-md: aggregating distance threshold (default : 0.03)\n";
		cerr << '\n';
		cerr << "\t-ps         : postscript output for tree (requires LateX), or for site-specific profiles\n";
		cerr << '\n';
		exit(1);
	}

	if (SampleName == "")	{
		SampleName = ChainName + "_sample";
	}
	try	{

		string name = ChainName + ".param";
		MCParameters* mParam2 = 0;
		if (ifstream(name.c_str()))	{
			mParam2 = new MCParameters;
			ifstream param_is(name.c_str());
			param_is >> *mParam2;
		}
		
		if (mParam2 && mParam2->NormalApprox)	{
			clock = 1;
		}


		if ((! catgtrpoint) && (! truelength) && (! sitestat) && (!modes) && (!rates) && (! clock) && (! clustermodes) && ((!mParam2) || (! mParam2->FixTopo)) )	{

			string name = ChainName + ".treelist";
			if (! ifstream(name.c_str()))	{
				name = ChainName;
				if (! ifstream(name.c_str()))	{
					cerr << "error: non existing chain\n";
					exit(1);
				}
			}
			BipartitionList bplist(name,burnin,every,until);
			if (!bplist.Ntree)	{
				cerr << "empty tree list\n";
				exit(1);
			}
			ofstream bplist_os((SampleName + ".bplist").c_str());
			bplist.WriteToStream(bplist_os);
			bplist_os.close();
			cout << bplist.Ntree << " trees were read\n";
			cout.flush();
			Consensus* cons = new Consensus(&bplist,cutoff);
			ofstream cons_os((SampleName + ".con.tre").c_str());
			// cons->Trichotomise();
			cons->Phylip(cons_os, 1, 1, 1, 0);
			cons_os.close();
			cerr << '\n';
			cerr << "bipartition list in\t" << SampleName << ".bplist\n";
			cerr << "consensus in\t\t" << SampleName << ".con.tre\n";
			if (ps)	{
				cons->ToPS((string) (SampleName + ".con.tre"),12,20,1,1,1,0);
				cerr << "postscript in\t\t" << SampleName << ".con.tre.ps\n";
			}
			cerr << '\n';
			if ((! mParam2) || (!mParam2->SaveAll))	{
				exit(1);
			}
		}
		// delete mParam2;

		Sample* sample = new Sample(ChainName,burnin,every,until,Path);
		if (save)	{
			sample->ToFile(SampleName);
		}
		MCParameters* mParam = sample->GetParameters();
		// initialising

		int Nsite = mParam->Nsite;
		int Ntaxa = mParam->Ntaxa;

		cout << '\n';
		cout << "Nsite : " << Nsite << '\n';
		cout << "Ntaxa : " << Ntaxa << '\n';
		cout << sample->GetSize() << " points to read\n";
		cout << '\n';
		cout.flush();


		if (mParam->NormalApprox)	{
			if (clustermodes)	{
				cerr << "no mode clustering under normal approx\n";
				cerr << '\n';
				exit(1);
			}
			if (modes)	{
				cerr << "no mode analysis under normal approx\n";
				cerr << '\n';
				exit(1);
			}
			if (rates)	{
				cerr << "no rate analysis under normal approx\n";
				cerr << '\n';
				exit(1);
			}
			if (sitestat)	{
				cerr << "no site-specific profile analysis under normal approx\n";
				cerr << '\n';
				exit(1);
			}
		}
		if (clock)	{
			sample->Dating(ps);
			sample->Reset();
		}
		else if (sitelogl)	{
			sample->ReadSiteLogLikelihood();
		}
		else if (intlnl)	{
			sample->ReadSummedLogLikelihood();
		}
		else if (entropyN)	{
			sample->ReadFiniteTimeEntropy(entropytimemax,entropyN);
		}
		else if (catgtrpoint)	{
			sample->ReadCATGTRPoint();
		}
		else if (popeff)	{
			sample->ReadPopEff(ps);
		}
		else if (branchfreqs)	{
			sample->ReadBranchFreqs();
		}
		else if (coaff)	{
			sample->Coaff(coaff,step);
		}	
		else if (rr)	{
			sample->MeanRR();
		}
		else if (benchmark)	{
			sample->ReadBench();
		}
		else if (clustermodes)	{
			if (! mParam->SaveAll)	{
				cerr << "error : biochemical profiles were not saved. cannot cluster them\n";
				exit(1);
			}
			sample->ClusterModes(minsize, mindist, 0);
		}
		else if (grepmode)	{
			sample->GrepModes();
		}
		else if (truelength)	{
			sample->TrueLength(nrep,sitebranch);
		}
		else if (constcorrect)	{
			sample->ConstantSiteCorrection(rho);
		}
		else {
			sample->Read(rates, modes, sitestat, cutoff, ncat, ps);
		}
	}
Beispiel #4
0
int TreeGroupCommand::createProcesses(SharedRAbundVectors*& thisLookup, CountTable& ct){
    try {
        
        vector<string> groupNames = thisLookup->getNamesGroups();
        Treenames = groupNames; //may have changed if subsample eliminated groups
        
        vector<int> lines;
        if (processors > (iters+1)) { processors = iters+1; }
        
        //figure out how many sequences you have to process
        int numItersPerProcessor = (iters+1) / processors;
        for (int i = 0; i < processors; i++) {
            if(i == (processors - 1)){	numItersPerProcessor = (iters+1) - i * numItersPerProcessor; 	}
            lines.push_back(numItersPerProcessor);
        }
        
        //create array of worker threads
        vector<thread*> workerThreads;
        vector<treeSharedData*> data;
        
        //Lauch worker threads
        for (int i = 0; i < processors-1; i++) {
            
            //make copy of lookup so we don't get access violations
            SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup);
            treeSharedData* dataBundle = new treeSharedData(lines[i+1], false, subsample, withReplacement, subsampleSize, Estimators, newLookup);
            
            data.push_back(dataBundle);
            workerThreads.push_back(new thread(process, dataBundle));
        }
        
        //make copy of lookup so we don't get access violations
        SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup);
        treeSharedData* dataBundle = new treeSharedData(lines[0], true, subsample, withReplacement, subsampleSize, Estimators, newLookup);
        process(dataBundle);
        delete newLookup;
        
        Estimators.clear(); Estimators = dataBundle->Estimators;
        vector< vector< vector<seqDist> > > calcDistsTotals = dataBundle->calcDistsTotals;
        vector< vector< vector<double> > > matrices = dataBundle->matrices;
        
        for (int i = 0; i < processors-1; i++) {
            workerThreads[i]->join();
            
            //get calcDistsTotal info - one entry per iter
            for (int j = 0; j < data[i]->calcDistsTotals.size(); j++) { calcDistsTotals.push_back(data[i]->calcDistsTotals[j]); }
            
            delete data[i]->thisLookup;
            delete data[i];
            delete workerThreads[i];
        }
        delete dataBundle;
        if (iters != 1) {
            //we need to find the average distance and standard deviation for each groups distance
            vector< vector<seqDist>  > calcAverages = util.getAverages(calcDistsTotals);
            
            if (m->getDebug()) {  m->mothurOut("[DEBUG]: found averages.\n"); }
            
            //create average tree for each calc
            for (int i = 0; i < Estimators.size(); i++) {
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup->size());
                for (int k = 0; k < thisLookup->size(); k++) {  matrix[k].resize(thisLookup->size(), 0.0); }
                
                for (int j = 0; j < calcAverages[i].size(); j++) {
                    int row = calcAverages[i][j].seq1;
                    int column = calcAverages[i][j].seq2;
                    float dist = calcAverages[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
                variables["[calc]"] = Estimators[i];
                variables["[distance]"] = thisLookup->getLabel();
                variables["[tag]"] = "ave";
                string outputFile = getOutputFileName("tree",variables);
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = new Tree(&ct, matrix, Treenames);
                if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
                else { newTree->assembleTree(); }
                if (newTree != NULL) { newTree->createNewickFile(outputFile);  delete newTree; }
            }
            
            if (m->getDebug()) {  m->mothurOut("[DEBUG]: done averages trees.\n"); }
            
            //create all trees for each calc and find their consensus tree
            for (int i = 0; i < Estimators.size(); i++) {
                if (m->getControl_pressed()) { break; }
                
                //create a new filename
                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
                variables["[calc]"] = Estimators[i];
                variables["[distance]"] = thisLookup->getLabel();
                variables["[tag]"] = "all";
                string outputFile = getOutputFileName("tree",variables);
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                
                ofstream outAll;
                util.openOutputFile(outputFile, outAll);
                
                vector<Tree*> trees;
                for (int myIter = 0; myIter < iters; myIter++) {
                    
                    if(m->getControl_pressed()) { break; }
                    
                    //initialize matrix
                    vector< vector<double> > matrix; //square matrix to represent the distance
                    matrix.resize(thisLookup->size());
                    for (int k = 0; k < thisLookup->size(); k++) {  matrix[k].resize(thisLookup->size(), 0.0); }
                    
                    for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) {
                        int row = calcDistsTotals[myIter][i][j].seq1;
                        int column = calcDistsTotals[myIter][i][j].seq2;
                        double dist = calcDistsTotals[myIter][i][j].dist;
                        
                        matrix[row][column] = dist;
                        matrix[column][row] = dist;
                    }
                    
                    //creates tree from similarity matrix and write out file
                    Tree* newTree = new Tree(&ct, matrix, Treenames);
                    if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
                    else { newTree->assembleTree(); }
                    if (newTree != NULL) {
                        newTree->print(outAll);
                        trees.push_back(newTree);
                    }
                }
                outAll.close();
                if (m->getControl_pressed()) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
                
                if (m->getDebug()) {  m->mothurOut("[DEBUG]: done all trees.\n"); }
                
                Consensus consensus;
                Tree* conTree = consensus.getTree(trees);
                
                if (m->getDebug()) {  m->mothurOut("[DEBUG]: done cons tree.\n"); }
                
                //create a new filename
                variables["[tag]"] = "cons";
                string conFile = getOutputFileName("tree",variables);
            
                outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
                ofstream outTree;
                util.openOutputFile(conFile, outTree);
                
                if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
            }
        }else {
            for (int i = 0; i < matrices.size(); i++) {
                if (m->getControl_pressed()) { break; }
                
                //initialize matrix
                vector< vector<double> > matrix = matrices[i]; //square matrix to represent the distance
                
                //create a new filename
                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
                variables["[calc]"] = Estimators[i];
                variables["[distance]"] = thisLookup->getLabel();
                variables["[tag]"] = "";
                string outputFile = getOutputFileName("tree",variables);
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = new Tree(&ct, matrix, Treenames);
                if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
                else { newTree->assembleTree(); }
                if (newTree != NULL) { newTree->createNewickFile(outputFile);  delete newTree; }
            }
        }
        
        return 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "TreeGroupCommand", "createProcesses");
        exit(1);
    }
}