Esempio n. 1
0
int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
	try {
		vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
        vector< vector<seqDist>  > calcDists; calcDists.resize(matrixCalculators.size()); 		
                  
        for (int thisIter = 0; thisIter < iters+1; thisIter++) {
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
            variables["[distance]"] = thisLookup[0]->getLabel();
            variables["[tag2]"] = "";
            
            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
            
            if (subsample && (thisIter != 0)) {
                SubSample sample;
                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
                
                //make copy of lookup so we don't get access violations
                vector<SharedRAbundVector*> newLookup;
                for (int k = 0; k < thisItersLookup.size(); k++) {
                    SharedRAbundVector* temp = new SharedRAbundVector();
                    temp->setLabel(thisItersLookup[k]->getLabel());
                    temp->setGroup(thisItersLookup[k]->getGroup());
                    newLookup.push_back(temp);
                }
                
                //for each bin
                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                }
                
                tempLabels = sample.getSample(newLookup, subsampleSize);
                thisItersLookup = newLookup;
            }
        
            if(processors == 1){
                driver(thisItersLookup, 0, numGroups, calcDists);
            }else{
                int process = 1;
                vector<int> processIDS;
                
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //loop through and create all the processes you want
                while (process != processors) {
                    pid_t pid = fork();
                    
                    if (pid > 0) {
                        processIDS.push_back(pid); 
                        process++;
                    }else if (pid == 0){
                        
                        driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);   
                        
                        string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist";
                        ofstream outtemp;
                        m->openOutputFile(tempdistFileName, outtemp);
                            
                        for (int i = 0; i < calcDists.size(); i++) {
                            outtemp << calcDists[i].size() << endl;
                                
                            for (int j = 0; j < calcDists[i].size(); j++) {
                                outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
                            }
                        }
                        outtemp.close();
                                        
                        exit(0);
                    }else { 
                        m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                        exit(0);
                    }
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                            
                //force parent to wait until all the processes are done
                for (int i = 0; i < processIDS.size(); i++) {
                    int temp = processIDS[i];
                    wait(&temp);
                }
                
                for (int i = 0; i < processIDS.size(); i++) {
                    string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) +  ".dist";
                    ifstream intemp;
                    m->openInputFile(tempdistFileName, intemp);
                        
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = 0;
                        intemp >> size; m->gobble(intemp);
                            
                        for (int j = 0; j < size; j++) {
                            int seq1 = 0;
                            int seq2 = 0;
                            float dist = 1.0;
                                
                            intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
                                
                            seqDist tempDist(seq1, seq2, dist);
                            calcDists[k].push_back(tempDist);
                        }
                    }
                    intemp.close();
                    m->mothurRemove(tempdistFileName);
                }
                #else
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                //Windows version shared memory, so be careful when passing variables through the distSharedData struct. 
                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
                //Taking advantage of shared memory to pass results vectors.
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                
                vector<distSharedData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
                for( int i=1; i<processors; i++ ){
                    
                    //make copy of lookup so we don't get access violations
                    vector<SharedRAbundVector*> newLookup;
                    for (int k = 0; k < thisItersLookup.size(); k++) {
                        SharedRAbundVector* temp = new SharedRAbundVector();
                        temp->setLabel(thisItersLookup[k]->getLabel());
                        temp->setGroup(thisItersLookup[k]->getGroup());
                        newLookup.push_back(temp);
                    }
                    
                    //for each bin
                    for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                        for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                    }
                    
                    // Allocate memory for thread data.
                    distSharedData* tempSum = new distSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup);
                    pDataArray.push_back(tempSum);
                    processIDS.push_back(i);
                    
                    hThreadArray[i-1] = CreateThread(NULL, 0, MyDistSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                           
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
                    }
                    for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
                    
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = pDataArray[i]->calcDists[k].size();
                        for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
                    }
                    
                    CloseHandle(hThreadArray[i]);
                    delete pDataArray[i];
                }

                #endif
            }
            
            if (subsample && (thisIter != 0)) {  
                if((thisIter) % 100 == 0){	m->mothurOutJustToScreen(toString(thisIter)+"\n"); 		}
                calcDistsTotals.push_back(calcDists);
                for (int i = 0; i < calcDists.size(); i++) {
                    for (int j = 0; j < calcDists[i].size(); j++) {
                        if (m->debug) {  m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n");  }
                    } 
                }
                //clean up memory
                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
                thisItersLookup.clear();
            }else { //print results for whole dataset
                for (int i = 0; i < calcDists.size(); i++) {
                    if (m->control_pressed) { break; }
                    
                    //initialize matrix
                    vector< vector<double> > matrix; //square matrix to represent the distance
                    matrix.resize(thisLookup.size());
                    for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                    
                    for (int j = 0; j < calcDists[i].size(); j++) {
                        int row = calcDists[i][j].seq1;
                        int column = calcDists[i][j].seq2;
                        double dist = calcDists[i][j].dist;
                        
                        matrix[row][column] = dist;
                        matrix[column][row] = dist;
                    }
                    
                    variables["[outputtag]"] = output;
                    variables["[calc]"] = matrixCalculators[i]->getName();
                    string distFileName = getOutputFileName("phylip",variables);
                    outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
                    
                    ofstream outDist;
                    m->openOutputFile(distFileName, outDist);
                    outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
                    
                    printSims(outDist, matrix);
                    
                    outDist.close();
                }
            }
            for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
		}
		
        if (iters != 0) {
            //we need to find the average distance and standard deviation for each groups distance
            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals, mode);
            
            //find standard deviation
            vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
            
            //print results
            for (int i = 0; i < calcDists.size(); i++) {
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                
                vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
                stdmatrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }

            
                for (int j = 0; j < calcAverages[i].size(); j++) {
                    int row = calcAverages[i][j].seq1;
                    int column = calcAverages[i][j].seq2;
                    float dist = calcAverages[i][j].dist;
                    float stdDist = stdDev[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                    stdmatrix[row][column] = stdDist;
                    stdmatrix[column][row] = stdDist;
                }
                
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[outputtag]"] = output;
                variables["[tag2]"] = "ave";
                variables["[calc]"] = matrixCalculators[i]->getName();
                string distFileName = getOutputFileName("phylip",variables);
                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
                //set current phylip file to average distance matrix
                m->setPhylipFile(distFileName);
                ofstream outAve;
                m->openOutputFile(distFileName, outAve);
                outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
                
                printSims(outAve, matrix);
                
                outAve.close();
                
                variables["[tag2]"] = "std";
                distFileName = getOutputFileName("phylip",variables);
                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
                ofstream outSTD;
                m->openOutputFile(distFileName, outSTD);
                outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
                
                printSims(outSTD, stdmatrix);
                
                outSTD.close();

            }
        }
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "MatrixOutputCommand", "process");
		exit(1);
	}
}
Esempio n. 2
0
int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
	try{
		vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
        vector< vector<seqDist>  > calcDists; calcDists.resize(treeCalculators.size()); 		
        
        for (int thisIter = 0; thisIter < iters; thisIter++) {
            
            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
            
            if (subsample) {
                SubSample sample;
                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
                
                //make copy of lookup so we don't get access violations
                vector<SharedRAbundVector*> newLookup;
                for (int k = 0; k < thisItersLookup.size(); k++) {
                    SharedRAbundVector* temp = new SharedRAbundVector();
                    temp->setLabel(thisItersLookup[k]->getLabel());
                    temp->setGroup(thisItersLookup[k]->getGroup());
                    newLookup.push_back(temp);
                }
                
                //for each bin
                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                }
                
                tempLabels = sample.getSample(newLookup, subsampleSize);
                thisItersLookup = newLookup;
            }
            
            if(processors == 1){
                driver(thisItersLookup, 0, numGroups, calcDists);
            }else{
                int process = 1;
                vector<int> processIDS;
                
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //loop through and create all the processes you want
                while (process != processors) {
                    pid_t pid = fork();
                    
                    if (pid > 0) {
                        processIDS.push_back(pid); 
                        process++;
                    }else if (pid == 0){
                        
                        driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);   
                        
                        string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist";
                        ofstream outtemp;
                        m->openOutputFile(tempdistFileName, outtemp);
                        
                        for (int i = 0; i < calcDists.size(); i++) {
                            outtemp << calcDists[i].size() << endl;
                            
                            for (int j = 0; j < calcDists[i].size(); j++) {
                                outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
                            }
                        }
                        outtemp.close();
                        
                        exit(0);
                    }else { 
                        m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                        exit(0);
                    }
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                
                //force parent to wait until all the processes are done
                for (int i = 0; i < processIDS.size(); i++) {
                    int temp = processIDS[i];
                    wait(&temp);
                }
                
                for (int i = 0; i < processIDS.size(); i++) {
                    string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) +  ".dist";
                    ifstream intemp;
                    m->openInputFile(tempdistFileName, intemp);
                    
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = 0;
                        intemp >> size; m->gobble(intemp);
                        
                        for (int j = 0; j < size; j++) {
                            int seq1 = 0;
                            int seq2 = 0;
                            float dist = 1.0;
                            
                            intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
                            
                            seqDist tempDist(seq1, seq2, dist);
                            calcDists[k].push_back(tempDist);
                        }
                    }
                    intemp.close();
                    m->mothurRemove(tempdistFileName);
                }
#else
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                //Windows version shared memory, so be careful when passing variables through the treeSharedData struct. 
                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
                //Taking advantage of shared memory to pass results vectors.
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                
                vector<treeSharedData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
                for( int i=1; i<processors; i++ ){
                    
                    //make copy of lookup so we don't get access violations
                    vector<SharedRAbundVector*> newLookup;
                    for (int k = 0; k < thisItersLookup.size(); k++) {
                        SharedRAbundVector* temp = new SharedRAbundVector();
                        temp->setLabel(thisItersLookup[k]->getLabel());
                        temp->setGroup(thisItersLookup[k]->getGroup());
                        newLookup.push_back(temp);
                    }
                    
                    //for each bin
                    for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                        for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
                    }
                    
                    // Allocate memory for thread data.
                    treeSharedData* tempSum = new treeSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup);
                    pDataArray.push_back(tempSum);
                    processIDS.push_back(i);
                    
                    hThreadArray[i-1] = CreateThread(NULL, 0, MyTreeSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
                }
                
                //parent do your part
                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
                    }
                    for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
                    
                    for (int k = 0; k < calcDists.size(); k++) {
                        int size = pDataArray[i]->calcDists[k].size();
                        for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
                    }
                    
                    CloseHandle(hThreadArray[i]);
                    delete pDataArray[i];
                }
                
#endif
            }
            
            calcDistsTotals.push_back(calcDists);
            
            if (subsample) {  
                
                //clean up memory
                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
                thisItersLookup.clear();
                for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
            }
            
            if (m->debug) {  m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
		}
        
		if (m->debug) {  m->mothurOut("[DEBUG]: done with iters.\n"); }
            
        if (iters != 1) {
            //we need to find the average distance and standard deviation for each groups distance
            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);  
            
            if (m->debug) {  m->mothurOut("[DEBUG]: found averages.\n"); }
            
            //create average tree for each calc
            for (int i = 0; i < calcDists.size(); i++) {
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                
                for (int j = 0; j < calcAverages[i].size(); j++) {
                    int row = calcAverages[i][j].seq1;
                    int column = calcAverages[i][j].seq2;
                    float dist = calcAverages[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "ave";
                string outputFile = getOutputFileName("tree",variables);				
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = createTree(matrix);
                if (newTree != NULL) { writeTree(outputFile, newTree); }                
            }
            
            if (m->debug) {  m->mothurOut("[DEBUG]: done averages trees.\n"); }
            
            //create all trees for each calc and find their consensus tree
            for (int i = 0; i < calcDists.size(); i++) {
                if (m->control_pressed) { break; }
                
                //create a new filename
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "all";
                string outputFile = getOutputFileName("tree",variables);				
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                ofstream outAll;
                m->openOutputFile(outputFile, outAll);
                
                vector<Tree*> trees; 
                for (int myIter = 0; myIter < iters; myIter++) {
                    
                    if(m->control_pressed) { break; }
                    
                    //initialize matrix
                    vector< vector<double> > matrix; //square matrix to represent the distance
                    matrix.resize(thisLookup.size());
                    for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                    
                    for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) {
                        int row = calcDistsTotals[myIter][i][j].seq1;
                        int column = calcDistsTotals[myIter][i][j].seq2;
                        double dist = calcDistsTotals[myIter][i][j].dist;
                       
                        matrix[row][column] = dist;
                        matrix[column][row] = dist;
                    }
                    
                    //creates tree from similarity matrix and write out file
                    Tree* newTree = createTree(matrix);
                    if (newTree != NULL) { 
                        newTree->print(outAll);
                        trees.push_back(newTree);
                    }
                }
                outAll.close();
                if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
                
                if (m->debug) {  m->mothurOut("[DEBUG]: done all trees.\n"); }
                
                Consensus consensus;
                //clear old tree names if any
                m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
                Tree* conTree = consensus.getTree(trees);
                
                if (m->debug) {  m->mothurOut("[DEBUG]: done cons tree.\n"); }
                
                //create a new filename
                variables["[tag]"] = "cons";
                string conFile = getOutputFileName("tree",variables);				
                outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
                ofstream outTree;
                m->openOutputFile(conFile, outTree);
                
                if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
            }

        }else {
            
            for (int i = 0; i < calcDists.size(); i++) {
                if (m->control_pressed) { break; }
                
                //initialize matrix
                vector< vector<double> > matrix; //square matrix to represent the distance
                matrix.resize(thisLookup.size());
                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                
                for (int j = 0; j < calcDists[i].size(); j++) {
                    int row = calcDists[i][j].seq1;
                    int column = calcDists[i][j].seq2;
                    double dist = calcDists[i][j].dist;
                    
                    matrix[row][column] = dist;
                    matrix[column][row] = dist;
                }
                
                //create a new filename
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
                variables["[calc]"] = treeCalculators[i]->getName();
                variables["[distance]"] = thisLookup[0]->getLabel();
                variables["[tag]"] = "";
                string outputFile = getOutputFileName("tree",variables);					
                outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                
                //creates tree from similarity matrix and write out file
                Tree* newTree = createTree(matrix);
                if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; }
            }
        }
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "process");
		exit(1);
	}
}
Esempio n. 3
0
//**********************************************************************************************************************
int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, ofstream& outAve) {
    try {

        //calculator -> data -> values
        vector< vector< vector<double> > >  results;
        results.resize(sumCalculators.size());

        outputFileHandle << sabund->getLabel();

        SubSample sample;
        for (int thisIter = 0; thisIter < iters+1; thisIter++) {

            SAbundVector* thisIterSabund = sabund;

            //we want the summary results for the whole dataset, then the subsampling
            if ((thisIter > 0) && subsample) { //subsample sabund and run it
                //copy sabund since getSample destroys it
                RAbundVector rabund = sabund->getRAbundVector();
                SAbundVector* newSabund = new SAbundVector();
                *newSabund = rabund.getSAbundVector();

                sample.getSample(newSabund, subsampleSize);
                thisIterSabund = newSabund;
            }

            for(int i=0; i<sumCalculators.size(); i++) {
                vector<double> data = sumCalculators[i]->getValues(thisIterSabund);

                if (m->control_pressed) {
                    return 0;
                }

                if (thisIter == 0) {
                    outputFileHandle << '\t';
                    sumCalculators[i]->print(outputFileHandle);
                } else {
                    //some of the calc have hci and lci need to make room for that
                    if (results[i].size() == 0) {
                        results[i].resize(data.size());
                    }
                    //save results for ave and std.
                    for (int j = 0; j < data.size(); j++) {
                        if (m->control_pressed) {
                            return 0;
                        }
                        results[i][j].push_back(data[j]);
                    }
                }
            }

            //cleanup memory
            if ((thisIter > 0) && subsample) {
                delete thisIterSabund;
            }
        }
        outputFileHandle << endl;

        if (subsample) {
            outAve << sabund->getLabel() << '\t' << "ave\t";
            //find ave and std for this label and output
            //will need to modify the createGroupSummary to combine results and not mess with the .summary file.

            //calcs -> values
            vector< vector<double> >  calcAverages;
            calcAverages.resize(sumCalculators.size());
            for (int i = 0; i < calcAverages.size(); i++) {
                calcAverages[i].resize(results[i].size(), 0);
            }

            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
                    for (int j = 0; j < calcAverages[i].size(); j++) {
                        calcAverages[i][j] += results[i][j][thisIter];
                    }
                }
            }

            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
                for (int j = 0; j < calcAverages[i].size(); j++) {
                    calcAverages[i][j] /= (float) iters;
                    outAve << calcAverages[i][j] << '\t';
                }
            }

            //find standard deviation
            vector< vector<double>  > stdDev;
            stdDev.resize(sumCalculators.size());
            for (int i = 0; i < stdDev.size(); i++) {
                stdDev[i].resize(results[i].size(), 0);
            }

            for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
                for (int i = 0; i < stdDev.size(); i++) {
                    for (int j = 0; j < stdDev[i].size(); j++) {
                        stdDev[i][j] += ((results[i][j][thisIter] - calcAverages[i][j]) * (results[i][j][thisIter] - calcAverages[i][j]));
                    }
                }
            }

            outAve << endl << sabund->getLabel() << '\t' << "std\t";
            for (int i = 0; i < stdDev.size(); i++) {  //finds average.
                for (int j = 0; j < stdDev[i].size(); j++) {
                    stdDev[i][j] /= (float) iters;
                    stdDev[i][j] = sqrt(stdDev[i][j]);
                    outAve << stdDev[i][j] << '\t';
                }
            }
            outAve << endl;
        }

        return 0;
    }
    catch(exception& e) {
        m->errorOut(e, "SummaryCommand", "process");
        exit(1);
    }
}
Esempio n. 4
0
int process(treeSharedData* params) {
    try{
        
        ValidCalculators validCalculator;
        vector<Calculator*> treeCalculators;
        for (int i=0; i<params->Estimators.size(); i++) {
            if (validCalculator.isValidCalculator("treegroup", params->Estimators[i]) ) {
                if (params->Estimators[i] == "sharedsobs") {
                    treeCalculators.push_back(new SharedSobsCS());
                }else if (params->Estimators[i] == "sharedchao") {
                    treeCalculators.push_back(new SharedChao1());
                }else if (params->Estimators[i] == "sharedace") {
                    treeCalculators.push_back(new SharedAce());
                }else if (params->Estimators[i] == "jabund") {
                    treeCalculators.push_back(new JAbund());
                }else if (params->Estimators[i] == "sorabund") {
                    treeCalculators.push_back(new SorAbund());
                }else if (params->Estimators[i] == "jclass") {
                    treeCalculators.push_back(new Jclass());
                }else if (params->Estimators[i] == "sorclass") {
                    treeCalculators.push_back(new SorClass());
                }else if (params->Estimators[i] == "jest") {
                    treeCalculators.push_back(new Jest());
                }else if (params->Estimators[i] == "sorest") {
                    treeCalculators.push_back(new SorEst());
                }else if (params->Estimators[i] == "thetayc") {
                    treeCalculators.push_back(new ThetaYC());
                }else if (params->Estimators[i] == "thetan") {
                    treeCalculators.push_back(new ThetaN());
                }else if (params->Estimators[i] == "kstest") {
                    treeCalculators.push_back(new KSTest());
                }else if (params->Estimators[i] == "sharednseqs") {
                    treeCalculators.push_back(new SharedNSeqs());
                }else if (params->Estimators[i] == "ochiai") {
                    treeCalculators.push_back(new Ochiai());
                }else if (params->Estimators[i] == "anderberg") {
                    treeCalculators.push_back(new Anderberg());
                }else if (params->Estimators[i] == "kulczynski") {
                    treeCalculators.push_back(new Kulczynski());
                }else if (params->Estimators[i] == "kulczynskicody") {
                    treeCalculators.push_back(new KulczynskiCody());
                }else if (params->Estimators[i] == "lennon") {
                    treeCalculators.push_back(new Lennon());
                }else if (params->Estimators[i] == "morisitahorn") {
                    treeCalculators.push_back(new MorHorn());
                }else if (params->Estimators[i] == "braycurtis") {
                    treeCalculators.push_back(new BrayCurtis());
                }else if (params->Estimators[i] == "whittaker") {
                    treeCalculators.push_back(new Whittaker());
                }else if (params->Estimators[i] == "odum") {
                    treeCalculators.push_back(new Odum());
                }else if (params->Estimators[i] == "canberra") {
                    treeCalculators.push_back(new Canberra());
                }else if (params->Estimators[i] == "structeuclidean") {
                    treeCalculators.push_back(new StructEuclidean());
                }else if (params->Estimators[i] == "structchord") {
                    treeCalculators.push_back(new StructChord());
                }else if (params->Estimators[i] == "hellinger") {
                    treeCalculators.push_back(new Hellinger());
                }else if (params->Estimators[i] == "manhattan") {
                    treeCalculators.push_back(new Manhattan());
                }else if (params->Estimators[i] == "structpearson") {
                    treeCalculators.push_back(new StructPearson());
                }else if (params->Estimators[i] == "soergel") {
                    treeCalculators.push_back(new Soergel());
                }else if (params->Estimators[i] == "spearman") {
                    treeCalculators.push_back(new Spearman());
                }else if (params->Estimators[i] == "structkulczynski") {
                    treeCalculators.push_back(new StructKulczynski());
                }else if (params->Estimators[i] == "speciesprofile") {
                    treeCalculators.push_back(new SpeciesProfile());
                }else if (params->Estimators[i] == "hamming") {
                    treeCalculators.push_back(new Hamming());
                }else if (params->Estimators[i] == "structchi2") {
                    treeCalculators.push_back(new StructChi2());
                }else if (params->Estimators[i] == "gower") {
                    treeCalculators.push_back(new Gower());
                }else if (params->Estimators[i] == "memchi2") {
                    treeCalculators.push_back(new MemChi2());
                }else if (params->Estimators[i] == "memchord") {
                    treeCalculators.push_back(new MemChord());
                }else if (params->Estimators[i] == "memeuclidean") {
                    treeCalculators.push_back(new MemEuclidean());
                }else if (params->Estimators[i] == "mempearson") {
                    treeCalculators.push_back(new MemPearson());
                }else if (params->Estimators[i] == "jsd") {
                    treeCalculators.push_back(new JSD());
                }else if (params->Estimators[i] == "rjsd") {
                    treeCalculators.push_back(new RJSD());
                }
            }
        }
        
        //if the users entered no valid calculators don't execute command
        if (treeCalculators.size() == 0) { params->m->mothurOut("You have given no valid calculators.\n");  return 0; }
        
        params->Estimators.clear();
        for (int i=0; i<treeCalculators.size(); i++) { params->Estimators.push_back(treeCalculators[i]->getName()); }
        
        vector< vector<seqDist>  > calcDists; calcDists.resize(treeCalculators.size());
        SubSample sample; 
        for (int thisIter = 0; thisIter < params->numIters; thisIter++) {
            
            SharedRAbundVectors* thisItersLookup = new SharedRAbundVectors(*params->thisLookup);
            vector<string> namesOfGroups = thisItersLookup->getNamesGroups();
            
            if ((params->subsample && (!params->mainThread)) || (params->mainThread && (thisIter != 0) ) ) {
                if (params->withReplacement)    { sample.getSampleWithReplacement(thisItersLookup, params->subsampleSize);  }
                else                            { sample.getSample(thisItersLookup, params->subsampleSize);                 }
            }
            
            vector<SharedRAbundVector*> thisItersRabunds = thisItersLookup->getSharedRAbundVectors();
            vector<string> thisItersGroupNames = params->thisLookup->getNamesGroups();
            
            driverTreeShared(thisItersRabunds, calcDists, treeCalculators, params->m);
            
            for (int i = 0; i < thisItersRabunds.size(); i++) { delete thisItersRabunds[i]; }
            
            if ((params->subsample && (!params->mainThread)) || (params->mainThread && (thisIter != 0) ) ){
                if((thisIter+1) % 100 == 0){	params->m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); 		}
                params->calcDistsTotals.push_back(calcDists);
                for (int i = 0; i < calcDists.size(); i++) {
                    for (int j = 0; j < calcDists[i].size(); j++) {
                        if (params->m->getDebug()) {  params->m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisItersGroupNames[calcDists[i][j].seq1] + " - " + thisItersGroupNames[calcDists[i][j].seq2] + " distance = " + toString(calcDists[i][j].dist) + ".\n");  }
                    }
                }
            }else { //print results for whole dataset
                for (int i = 0; i < calcDists.size(); i++) {
                    if (params->m->getControl_pressed()) { break; }
                    
                    //initialize matrix
                    vector< vector<double> > matrix; //square matrix to represent the distance
                    matrix.resize(thisItersLookup->size());
                    for (int k = 0; k < thisItersLookup->size(); k++) {  matrix[k].resize(thisItersLookup->size(), 0.0); }
                    
                    for (int j = 0; j < calcDists[i].size(); j++) {
                        int row = calcDists[i][j].seq1;
                        int column = calcDists[i][j].seq2;
                        double dist = calcDists[i][j].dist;
                        
                        matrix[row][column] = -(dist-1.0);
                        matrix[column][row] = -(dist-1.0);
                    }
                    params->matrices.push_back(matrix);
                }
            }
            for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
            delete thisItersLookup;
        }
        if((params->numIters) % 100 != 0){	params->m->mothurOutJustToScreen(toString(params->numIters)+"\n"); 		}
        for (int i=0; i<treeCalculators.size(); i++) { delete treeCalculators[i]; }
        
        return 0;
    }
    catch(exception& e) {
        params->m->errorOut(e, "TreeGroupCommand", "process");
        exit(1);
    }
}