Ejemplo n.º 1
0
SequenceCountParser::SequenceCountParser(string countfile, string fastafile) {
	try {
		
		m = MothurOut::getInstance();
		
		//read count file
		CountTable countTable;
		countTable.readTable(countfile, true, false);
		
		//initialize maps
		namesOfGroups = countTable.getNamesOfGroups();
		for (int i = 0; i < namesOfGroups.size(); i++) {
			vector<Sequence> temp;
			map<string, int> tempMap;
			seqs[namesOfGroups[i]] = temp;
			countTablePerGroup[namesOfGroups[i]] = tempMap;
		}
		
		//read fasta file making sure each sequence is in the group file
		ifstream in;
		m->openInputFile(fastafile, in);
		
        int fastaCount = 0;
		while (!in.eof()) {
			
			if (m->control_pressed) { break; }
			
			Sequence seq(in); m->gobble(in);
            fastaCount++;
            if (m->debug) { if((fastaCount) % 1000 == 0){	m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n.");	} }
			
            if (seq.getName() != "") {
				
                allSeqsMap[seq.getName()] = seq.getName();
                vector<int> groupCounts = countTable.getGroupCounts(seq.getName());
                
                for (int i = 0; i < namesOfGroups.size(); i++) {
                    if (groupCounts[i] != 0) {
                        seqs[namesOfGroups[i]].push_back(seq);	
                        countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i];
                    }
                }
			}
		}
		in.close();					
	}
	catch(exception& e) {
		m->errorOut(e, "SequenceCountParser", "SequenceCountParser");
		exit(1);
	}
}
Ejemplo n.º 2
0
SequenceCountParser::SequenceCountParser(string fastafile, CountTable& countTable) {
	try {
		
		m = MothurOut::getInstance();
				
		//initialize maps
        if (countTable.hasGroupInfo()) {
            namesOfGroups = countTable.getNamesOfGroups();
            for (int i = 0; i < namesOfGroups.size(); i++) {
                vector<Sequence> temp;
                map<string, int> tempMap;
                seqs[namesOfGroups[i]] = temp;
                countTablePerGroup[namesOfGroups[i]] = tempMap;
            }
            
            //read fasta file making sure each sequence is in the group file
            ifstream in;
            m->openInputFile(fastafile, in);
            
            int fastaCount = 0;
            while (!in.eof()) {
                
                if (m->control_pressed) { break; }
                
                Sequence seq(in); m->gobble(in);
                fastaCount++;
                if (m->debug) { if((fastaCount) % 1000 == 0){	m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n.");	} }
                
                if (seq.getName() != "") {
                    
                    allSeqsMap[seq.getName()] = seq.getName();
                    vector<int> groupCounts = countTable.getGroupCounts(seq.getName());
                    
                    for (int i = 0; i < namesOfGroups.size(); i++) {
                        if (groupCounts[i] != 0) {
                            seqs[namesOfGroups[i]].push_back(seq);	
                            countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i];
                        }
                    }
                }
            }
            in.close();	
        }else {  m->control_pressed = true;  m->mothurOut("[ERROR]: cannot parse fasta file by group with a count table that does not include group data, please correct.\n"); }
        
	}
	catch(exception& e) {
		m->errorOut(e, "SequenceCountParser", "SequenceCountParser");
		exit(1);
	}
}
Ejemplo n.º 3
0
//**********************************************************************************************************************
int SplitGroupCommand::runCount(){
	try {
        
        CountTable ct;
        ct.readTable(countfile, true, false);
        if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
        
        if (m->control_pressed) { return 0; }
        
        vector<string> namesGroups = ct.getNamesOfGroups();
        SharedUtil util;  util.setGroups(Groups, namesGroups); 
        
        //fill filehandles with neccessary ofstreams
        map<string, string> ffiles; //group -> filename
        map<string, string> cfiles; //group -> filename
        for (int i=0; i<Groups.size(); i++) {
            ofstream ftemp, ctemp;
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
            variables["[group]"] = Groups[i];
            string newFasta = getOutputFileName("fasta",variables);
            outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
            ffiles[Groups[i]] = newFasta;
            m->openOutputFile(newFasta, ftemp); ftemp.close();
            
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
            string newCount = getOutputFileName("count",variables);
            outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
            cfiles[Groups[i]] = newCount;
            m->openOutputFile(newCount, ctemp);
            ctemp << "Representative_Sequence\ttotal\t" << Groups[i] << endl; ctemp.close();
        }
        
        ifstream in; 
        m->openInputFile(fastafile, in);
        
        while (!in.eof()) {
            Sequence seq(in); m->gobble(in);
            
            if (m->control_pressed) { break; }
            if (seq.getName() != "") {
                vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
                for (int i = 0; i < thisSeqsGroups.size(); i++) {
                    if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
                        ofstream outf, outc;
                        m->openOutputFileAppend(ffiles[thisSeqsGroups[i]], outf);
                        seq.printSequence(outf); outf.close();
                        int numSeqs = ct.getGroupCount(seq.getName(), thisSeqsGroups[i]);
                        m->openOutputFileAppend(cfiles[thisSeqsGroups[i]], outc);
                        outc << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl; outc.close();
                    }
                }
            }
        }
        in.close();
        
        return 0;

    }
	catch(exception& e) {
		m->errorOut(e, "SplitGroupCommand", "runCount");
		exit(1);
	}
}
Ejemplo n.º 4
0
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){
    try {
        CountTable countTable;
        if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; }
        
        //read countTable
        countTable.readTable(countfile, true, false);
        
        //fill Groups - checks for "all" and for any typo groups
        vector<string> nameGroups = countTable.getNamesOfGroups();
        if (Groups.size() == 0) { Groups = nameGroups; }
        
        
        vector<string> dnamesGroups = designMap->getNamesGroups();
        
        //sanity check
        bool error = false;
        if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number
            //is every group in counttable also in designmap
            for (int i = 0; i < nameGroups.size(); i++) {
                if (m->getControl_pressed()) { break; }
                if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; }
            }
            
        }
        if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; }
        
        //user selected groups - remove some groups from table
        if (Groups.size() != nameGroups.size()) {
            for (int i = 0; i < nameGroups.size(); i++) {
                if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); }
            }
        }
        //ask again in case order changed
        nameGroups = countTable.getNamesOfGroups();
        int numGroups = nameGroups.size();
        
        //create new table
        CountTable newTable;
        vector<string> treatments = designMap->getCategory();
        map<string, vector<int> > clearedMap;
        for (int i = 0; i < treatments.size(); i++) {
            newTable.addGroup(treatments[i]);
            vector<int> temp;
            clearedMap[treatments[i]] = temp;
        }
        treatments = newTable.getNamesOfGroups();
        
        set<string> namesToRemove;
        vector<string> namesOfSeqs = countTable.getNamesOfSeqs();
        for (int i = 0; i < namesOfSeqs.size(); i++) {
            
            if (m->getControl_pressed()) { break; }
            
            vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]);
            map<string, vector<int> > thisSeqsMap = clearedMap;
            
            for (int j = 0; j < numGroups; j++) {
                thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]);
            }
        
            //create new counts for seq for new table
            vector<int> newCounts; int totalAbund = 0;
            for (int j = 0; j < treatments.size(); j++){
                int abund = mergeAbund(thisSeqsMap[treatments[j]]);
                newCounts.push_back(abund);  //order matters, add in count for each treatment in new table.
                totalAbund += abund;
            }
            
            //add seq to new table
            if(totalAbund == 0) {
                namesToRemove.insert(namesOfSeqs[i]);
            }else { newTable.push_back(namesOfSeqs[i], newCounts); }
        }
        
        if (error) { m->setControl_pressed(true); return 0; }
        
        //remove sequences zeroed out by median method
        if (namesToRemove.size() != 0) {
            //print names
            ofstream out;
            string accnosFile = "accnosFile.temp";
            util.openOutputFile(accnosFile, out);
            
            //output to .accnos file
            for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) {
                if (m->getControl_pressed()) {  out.close(); util.mothurRemove(accnosFile); return 0; }
                out << *it << endl;
            }
            out.close();

            //run remove.seqs
            string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile;
            
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
            current->setMothurCalling(true);
            
            Command* removeCommand = new RemoveSeqsCommand(inputString);
            removeCommand->execute();
            
            map<string, vector<string> > filenames = removeCommand->getOutputFiles();
            
            delete removeCommand;
            current->setMothurCalling(false);
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            
            util.mothurRemove(accnosFile);
        }
    
        string thisOutputDir = outputDir;
        if (outputDir == "") {  thisOutputDir += util.hasPath(countfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile));
        variables["[extension]"] = util.getExtension(countfile);
        string outputFileName = getOutputFileName("count", variables);
        outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
        
        newTable.printTable(outputFileName);
        
        return 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "MergeGroupsCommand", "processCountFile");
        exit(1);
    }
}
Ejemplo n.º 5
0
int RemoveGroupsCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//get groups you want to remove
		if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups);  }
		
		if (groupfile != "") {
			groupMap = new GroupMap(groupfile);
			groupMap->readMap();
			
			//make sure groups are valid
			//takes care of user setting groupNames that are invalid or setting groups=all
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			vector<string> checkedGroups;
            for (int i = 0; i < Groups.size(); i++) {
                if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
                else {  m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
            }
            
            if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
			else {
                Groups = checkedGroups;
                m->setGroups(Groups);
            }
            
			//fill names with names of sequences that are from the groups we want to remove 
			fillNames();
			
			delete groupMap;
		}else if (countfile != ""){
            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
            }
            CountTable ct;
            ct.readTable(countfile, true, false);
            if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
            
            vector<string> gNamesOfGroups = ct.getNamesOfGroups();
            SharedUtil util;
            util.setGroups(Groups, gNamesOfGroups);
            vector<string> namesOfSeqs = ct.getNamesOfSeqs();
            sort(Groups.begin(), Groups.end());
            
            for (int i = 0; i < namesOfSeqs.size(); i++) {
                vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
                if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
                    names.insert(namesOfSeqs[i]);
                }
            }
        }

				
		if (m->control_pressed) { return 0; }
		
		//read through the correct file and output lines you want to keep
		if (namefile != "")			{		readName();		}
		if (fastafile != "")		{		readFasta();	}
		if (groupfile != "")		{		readGroup();	}
        if (countfile != "")		{		readCount();	}
		if (listfile != "")			{		readList();		}
		if (taxfile != "")			{		readTax();		}
		if (sharedfile != "")		{		readShared();	}
        if (designfile != "")		{		readDesign();	}
		
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0; }
				
		if (outputNames.size() != 0) {
			m->mothurOutEndLine();
			m->mothurOut("Output File names: "); m->mothurOutEndLine();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
			m->mothurOutEndLine();
			
			//set fasta file as new current fastafile
			string current = "";
			itTypes = outputTypes.find("fasta");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
			}
			
			itTypes = outputTypes.find("name");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
			}
			
			itTypes = outputTypes.find("group");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
			}
			
			itTypes = outputTypes.find("list");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
			}
			
			itTypes = outputTypes.find("taxonomy");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
			}
			
			itTypes = outputTypes.find("shared");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
			}
            
            itTypes = outputTypes.find("design");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
			}
            
            itTypes = outputTypes.find("count");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
			}
		}
		
		return 0;		
	}
	
	catch(exception& e) {
		m->errorOut(e, "RemoveGroupsCommand", "execute");
		exit(1);
	}
}
Ejemplo n.º 6
0
EstOutput Parsimony::getValues(Tree* t, int p, string o) {
	try {
		processors = p;
		outputDir = o;
        CountTable* ct = t->getCountTable();
		
		//if the users enters no groups then give them the score of all groups
		vector<string> mGroups = m->getGroups();
		int numGroups = mGroups.size();
		
		//calculate number of comparsions
		int numComp = 0;
		vector< vector<string> > namesOfGroupCombos;
		for (int r=0; r<numGroups; r++) { 
			for (int l = 0; l < r; l++) {
				numComp++;
				vector<string> groups; groups.push_back(mGroups[r]); groups.push_back(mGroups[l]);
				//cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl;
				namesOfGroupCombos.push_back(groups);
			}
		}

		//numComp+1 for AB, AC, BC, ABC
		if (numComp != 1) {
			vector<string> groups;
			if (numGroups == 0) {
				//get score for all users groups
				vector<string> tGroups = ct->getNamesOfGroups();
				for (int i = 0; i < tGroups.size(); i++) {
					if (tGroups[i] != "xxx") {
						groups.push_back(tGroups[i]);
						//cout << tmap->namesOfGroups[i] << endl;
					}
				}
				namesOfGroupCombos.push_back(groups);
			}else {
				for (int i = 0; i < mGroups.size(); i++) {
					groups.push_back(mGroups[i]);
					//cout << globaldata->Groups[i] << endl;
				}
				namesOfGroupCombos.push_back(groups);
			}
		}
        
        lines.clear();
        int remainingPairs = namesOfGroupCombos.size();
        int startIndex = 0;
        for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
            int numPairs = remainingPairs; //case for last processor
            if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
            lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
            startIndex = startIndex + numPairs;
            remainingPairs = remainingPairs - numPairs;
        }
        
        data = createProcesses(t, namesOfGroupCombos, ct);
		
		return data;
		
	}
	catch(exception& e) {
		m->errorOut(e, "Parsimony", "getValues");
		exit(1);
	}
}
Ejemplo n.º 7
0
//**********************************************************************************************************************
int SharedCommand::createSharedFromListGroup() {
	try {

        GroupMap* groupMap = NULL;
        CountTable* countTable = NULL;
        pickedGroups = false;
        if (groupfile != "") {
            groupMap = new GroupMap(groupfile);

            int groupError = groupMap->readMap();
            if (groupError == 1) { delete groupMap; return 0; }
            vector<string> allGroups = groupMap->getNamesOfGroups();
            if (Groups.size() == 0) { Groups = allGroups; }
            else { pickedGroups = true; }
        }else{
            countTable = new CountTable();
            countTable->readTable(countfile, true, false);
            vector<string> allGroups = countTable->getNamesOfGroups();
            if (Groups.size() == 0) { Groups = allGroups; }
            else { pickedGroups = true; }
        }
        int numGroups = Groups.size();
        if (m->getControl_pressed()) { return 0; }

        ofstream out;
        string filename = "";
        if (!pickedGroups) {
            string filename = listfile;
            if (outputDir == "") { outputDir += util.hasPath(filename); }

            map<string, string> variables;
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
            filename = getOutputFileName("shared",variables);
            outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
            util.openOutputFile(filename, out);
        }

        //set fileroot
        fileroot = outputDir + util.getRootName(util.getSimpleName(listfile));
        map<string, string> variables;
		variables["[filename]"] = fileroot;
        string errorOff = "no error";

        InputData input(listfile, "shared", Groups);
        SharedListVector* SharedList = input.getSharedListVector();
        string lastLabel = SharedList->getLabel();
        SharedRAbundVectors* lookup;

        if (m->getControl_pressed()) {
            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
            out.close(); if (!pickedGroups) { util.mothurRemove(filename); }
            return 0;
        }

        //sanity check
        vector<string> namesSeqs;
        int numGroupNames = 0;
        if (current->getGroupMode() == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
        else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
        int error = ListGroupSameSeqs(namesSeqs, SharedList);

        if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) {  //if the user has not specified any groups and their files don't match exit with error
            m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct.\n");  m->setControl_pressed(true);

            out.close(); if (!pickedGroups) { util.mothurRemove(filename); } //remove blank shared file you made

            //delete memory
            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
            return 0;
        }

        if (error == 1) { m->setControl_pressed(true); }

        //if user has specified groups make new groupfile for them
        if ((pickedGroups) && (current->getGroupMode() == "group")) { //make new group file
            string groups = "";
            if (numGroups < 4) {
                for (int i = 0; i < numGroups-1; i++) {
                    groups += Groups[i] + ".";
                }
                groups+=Groups[numGroups-1];
            }else { groups = "merge"; }
            map<string, string> variables;
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(listfile));
            variables["[group]"] = groups;
            string newGroupFile = getOutputFileName("group",variables);
            outputTypes["group"].push_back(newGroupFile);
            outputNames.push_back(newGroupFile);
            ofstream outGroups;
            util.openOutputFile(newGroupFile, outGroups);

            vector<string> names = groupMap->getNamesSeqs();
            string groupName;
            for (int i = 0; i < names.size(); i++) {
                groupName = groupMap->getGroup(names[i]);
                if (isValidGroup(groupName, Groups)) {
                    outGroups << names[i] << '\t' << groupName << endl;
                }
            }
            outGroups.close();
        }

        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
        set<string> processedLabels;
        set<string> userLabels = labels;
        bool printHeaders = true;
    
        while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
            if (m->getControl_pressed()) {
                delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                return 0;
            }

            if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){

                lookup = SharedList->getSharedRAbundVector();

                m->mothurOut(lookup->getLabel()+"\n"); 

                if (m->getControl_pressed()) {
                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                    delete lookup;
                    if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                    return 0;
                }

                //if picked groups must split the shared file by label
                if (pickedGroups) {
                    string filename = listfile;
                    if (outputDir == "") { outputDir += util.hasPath(filename); }

                    map<string, string> variables;
                    variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                    variables["[distance]"] = lookup->getLabel();
                    filename = getOutputFileName("shared",variables);
                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                    ofstream out2;
                    util.openOutputFile(filename, out2);

                    lookup->eliminateZeroOTUS();
                    printSharedData(lookup, out2, printHeaders);
                    out2.close();
                }else {
                    printSharedData(lookup, out, printHeaders); //prints info to the .shared file
                }
                delete lookup;

                processedLabels.insert(SharedList->getLabel());
                userLabels.erase(SharedList->getLabel());
            }

            if ((util.anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) ) && (processedLabels.count(lastLabel) != 1)) {
                string saveLabel = SharedList->getLabel();

                delete SharedList;
                SharedList = input.getSharedListVector(lastLabel); //get new list vector to process

                lookup = SharedList->getSharedRAbundVector();
                m->mothurOut(lookup->getLabel()+"\n"); 

                if (m->getControl_pressed()) {
                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                    delete lookup;
                    if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                    return 0;
                }

                //if picked groups must split the shared file by label
                if (pickedGroups) {
                    string filename = listfile;
                    if (outputDir == "") { outputDir += util.hasPath(filename); }

                    map<string, string> variables;
                    variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                    variables["[distance]"] = lookup->getLabel();
                    filename = getOutputFileName("shared",variables);
                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                    ofstream out2;
                    util.openOutputFile(filename, out2);

                    lookup->eliminateZeroOTUS();
                    printSharedData(lookup, out2, printHeaders);
                    out2.close();
                }else {
                    printSharedData(lookup, out, printHeaders); //prints info to the .shared file
                }
                delete lookup;

                processedLabels.insert(SharedList->getLabel());
                userLabels.erase(SharedList->getLabel());

                //restore real lastlabel to save below
                SharedList->setLabel(saveLabel);
            }


            lastLabel = SharedList->getLabel();

            delete SharedList;
            SharedList = input.getSharedListVector(); //get new list vector to process
        }
        
        //output error messages about any remaining user labels
        set<string>::iterator it;
        bool needToRun = false;
        for (it = userLabels.begin(); it != userLabels.end(); it++) {
            if (processedLabels.count(lastLabel) != 1) {
                needToRun = true;
            }
        }
        
        //run last label if you need to
        if (needToRun )  {
            if (SharedList != NULL) {	delete SharedList;	}
            SharedList = input.getSharedListVector(lastLabel); //get new list vector to process

            lookup = SharedList->getSharedRAbundVector();
            m->mothurOut(lookup->getLabel()+"\n"); 

            if (m->getControl_pressed()) {
                if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                return 0;
            }

            //if picked groups must split the shared file by label
            if (pickedGroups) {
                string filename = listfile;
                if (outputDir == "") { outputDir += util.hasPath(filename); }

                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                variables["[distance]"] = lookup->getLabel();
                filename = getOutputFileName("shared",variables);
                outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                ofstream out2;
                util.openOutputFile(filename, out2);

                lookup->eliminateZeroOTUS();
                printSharedData(lookup, out2, printHeaders);
                out2.close();
            }else {
                printSharedData(lookup, out, printHeaders); //prints info to the .shared file
            }
            delete lookup;
            delete SharedList;
        }
        
        if (!pickedGroups) { out.close(); }

        if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }

        if (m->getControl_pressed()) {
            if (!pickedGroups) { util.mothurRemove(filename); }
            return 0;
        }
        
        return 0;
    }
	catch(exception& e) {
		m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
		exit(1);
	}
}
Ejemplo n.º 8
0
int TreeGroupCommand::execute(){
	try {
	
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
		
		if (format == "sharedfile") {
			InputData input(sharedfile, "sharedfile", Groups);
			SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
			lastLabel = lookup->getLabel();
            Groups = lookup->getNamesGroups();
			
            if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command.\n");   return 0; }
			
			//create treemap class from groupmap for tree class to use
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < Groups.size(); i++) {
                nameMap.insert(Groups[i]);
                gps.insert(Groups[i]);
                groupMap[Groups[i]] = Groups[i];
            }
            ct.createTable(nameMap, groupMap, gps);
			
			//fills tree names with shared files groups
			Treenames = lookup->getNamesGroups();
            
			if (m->getControl_pressed()) { return 0; }
			
			//create tree file
			makeSimsShared(input, lookup, ct);
			
			if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);  }  return 0; }
		}else{
			//read in dist file
			filename = inputfile;
            
            ReadMatrix* readMatrix;
			if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }	
			else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
				
			readMatrix->setCutoff(cutoff);
	
            ListVector* list;
            if(namefile != ""){	
                NameAssignment* nameMap = new NameAssignment(namefile);
                nameMap->readMap();
                readMatrix->read(nameMap);
                list = readMatrix->getListVector();
                delete nameMap;
            }else if (countfile != "") {
                CountTable* ct = new CountTable();
                ct->readTable(countfile, true, false);
                readMatrix->read(ct);
                list = readMatrix->getListVector();
                delete ct;
            }else { NameAssignment* nameMap = NULL; readMatrix->read(nameMap); list = readMatrix->getListVector(); }

			SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
			Treenames.clear();
            
			//make treemap
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < list->getNumBins(); i++) {
                string bin = list->get(i);
                nameMap.insert(bin); 
                gps.insert(bin); 
                groupMap[bin] = bin;
                Treenames.push_back(bin);
            }
            ct.createTable(nameMap, groupMap, gps);
			vector<string> namesGroups = ct.getNamesOfGroups();
			
			if (m->getControl_pressed()) { return 0; }
			
			vector< vector<double> > matrix = makeSimsDist(dMatrix, list->getNumBins());
            delete readMatrix;
            delete dMatrix;
			
			if (m->getControl_pressed()) { return 0; }

			//create a new filename
            map<string, string> variables; 
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
			string outputFile = getOutputFileName("tree",variables);	
			outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
				
            Tree* newTree = new Tree(&ct, matrix, Treenames);
            if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
            else { newTree->assembleTree(); }
 
            if (newTree != NULL) {  newTree->createNewickFile(outputFile);  delete newTree; }
			
			if (m->getControl_pressed()) { return 0; } m->mothurOut("Tree complete.\n");
		}
				
		//set tree file as new current treefile
		string currentName = "";
		itTypes = outputTypes.find("tree");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); }
		}
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "execute");
		exit(1);
	}
}
Ejemplo n.º 9
0
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
	try {
				
		//you must provide a label because the names in the listfile need to be consistent
		string thisLabel = "";
		if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
		else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
		else { thisLabel = *labels.begin(); }
		
		InputData input(listfile, "list");
		ListVector* list = input.getListVector();
		
		//get first one or the one we want
		if (thisLabel != "") { 	
			//use smart distancing
			set<string> userLabels; userLabels.insert(thisLabel);
			set<string> processedLabels;
			string lastLabel = list->getLabel();
			while((list != NULL) && (userLabels.size() != 0)) {
				if(userLabels.count(list->getLabel()) == 1){
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					break;
				}
				
				if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					delete list;
					list = input.getListVector(lastLabel);
					break;
				}
				lastLabel = list->getLabel();
				delete list;
				list = input.getListVector();
			}
			if (userLabels.size() != 0) { 
				m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
				list = input.getListVector(lastLabel); 
			}
		}
        
        string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
        variables["[extension]"] = m->getExtension(listfile);
        variables["[tag]"] = list->getLabel();
		string outputFileName = getOutputFileName("list", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
        variables["[extension]"] = m->getExtension(groupfile);
		string outputGroupFileName = getOutputFileName("group", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
        variables["[extension]"] = m->getExtension(countfile);
        string outputCountFileName = getOutputFileName("count", variables);
        
		ofstream out, outGroup;
		m->openOutputFile(outputFileName, out);
		
		bool wroteSomething = false;

		
		//if groupfile is given then use it
		GroupMap* groupMap;
        CountTable ct;
		if (groupfile != "") { 
			groupMap = new GroupMap(groupfile); groupMap->readMap(); 
			SharedUtil util;
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			util.setGroups(Groups, namesGroups);
			m->openOutputFile(outputGroupFileName, outGroup);
		}else if (countfile != "") {
            ct.readTable(countfile, true, false);
            if (ct.hasGroupInfo()) {
                vector<string> namesGroups = ct.getNamesOfGroups();
                SharedUtil util;
                util.setGroups(Groups, namesGroups);
            }
        }
		
		
		if (list != NULL) {
            
            vector<string> binLabels = list->getLabels();
            vector<string> newLabels;
            
			//make a new list vector
			ListVector newList;
			newList.setLabel(list->getLabel());
			
			//for each bin
			for (int i = 0; i < list->getNumBins(); i++) {
				if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list->get(i);
				vector<string> names;
				string saveBinNames = binnames;
				m->splitAtComma(binnames, names);
                int binsize = names.size();
				
				vector<string> newGroupFile;
				if (groupfile != "") {
					vector<string> newNames;
					saveBinNames = "";
					for(int k = 0; k < names.size(); k++) {
						string group = groupMap->getGroup(names[k]);
						
						if (m->inUsersGroups(group, Groups)) {
							newGroupFile.push_back(names[k] + "\t" + group); 
								
							newNames.push_back(names[k]);	
							saveBinNames += names[k] + ",";
						}
					}
					names = newNames; binsize = names.size();
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
				}else if (countfile != "") {
					saveBinNames = "";
                    binsize = 0;
					for(int k = 0; k < names.size(); k++) {
                        if (ct.hasGroupInfo()) {
                            vector<string> thisSeqsGroups = ct.getGroups(names[k]);
                            
                            int thisSeqsCount = 0;
                            for (int n = 0; n < thisSeqsGroups.size(); n++) {
                                if (m->inUsersGroups(thisSeqsGroups[n], Groups)) {
                                    thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]);
                                }
                            }
                            binsize += thisSeqsCount;
                            //if you don't have any seqs from the groups the user wants, then remove you.
                            if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); }
                            else { saveBinNames += names[k] + ","; }
                        }else {
                            binsize += ct.getNumSeqs(names[k]); 
                            saveBinNames += names[k] + ",";
                        }
					}
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
                }

				if (binsize > nseqs) { //keep bin
					newList.push_back(saveBinNames);
                    newLabels.push_back(binLabels[i]);
					if (groupfile != "") {  for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }  }
                    else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) {  ct.remove(newGroupFile[k]); } }  
				}else {  if (countfile != "") {  for(int k = 0; k < names.size(); k++) {  ct.remove(names[k]); } }  }
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.setLabels(newLabels);
                newList.printHeaders(out);
                newList.print(out);
			}
		}	
		
		out.close();
		if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
        if (countfile != "") { 
            if (ct.hasGroupInfo()) {
                vector<string> allGroups = ct.getNamesOfGroups();
                for (int i = 0; i < allGroups.size(); i++) {
                    if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); }
                }

            }
            ct.printTable(outputCountFileName);
            outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); 
        }
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveRareCommand", "processList");
		exit(1);
	}
}
Ejemplo n.º 10
0
EstOutput Unweighted::getValues(Tree* t, int p, string o) {
	try {
		processors = p;
		outputDir = o;
        
        CountTable* ct = t->getCountTable();
        
		//if the users enters no groups then give them the score of all groups
		int numGroups = m->getNumGroups();
		
		//calculate number of comparsions
		int numComp = 0;
		vector< vector<string> > namesOfGroupCombos;
		for (int r=0; r<numGroups; r++) { 
			for (int l = 0; l < r; l++) {
				numComp++;
				vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
				namesOfGroupCombos.push_back(groups);
			}
		}
		
		if (numComp != 1) {
			vector<string> groups;
			if (numGroups == 0) {
				//get score for all users groups
				for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
					if ((ct->getNamesOfGroups())[i] != "xxx") {
						groups.push_back((ct->getNamesOfGroups())[i]);
					}
				}
				namesOfGroupCombos.push_back(groups);
			}else {
				for (int i = 0; i < m->getNumGroups(); i++) {
					groups.push_back((m->getGroups())[i]);
				}
				namesOfGroupCombos.push_back(groups);
			}
		}
        
        lines.clear();
        int remainingPairs = namesOfGroupCombos.size();
        int startIndex = 0;
        for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
            int numPairs = remainingPairs; //case for last processor
            if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
            lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
            startIndex = startIndex + numPairs;
            remainingPairs = remainingPairs - numPairs;
        }
        
        data = createProcesses(t, namesOfGroupCombos, ct);

        lines.clear();
        
		return data;
	}
	catch(exception& e) {
		m->errorOut(e, "Unweighted", "getValues");
		exit(1);
	}
}