예제 #1
0
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
		string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
		string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" +  m->getExtension(groupfile);
		
		ofstream out, outGroup;
		m->openOutputFile(outputFileName, out);
		
		bool wroteSomething = false;
		
		//you must provide a label because the names in the listfile need to be consistent
		string thisLabel = "";
		if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
		else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
		else { thisLabel = *labels.begin(); }
		
		InputData input(listfile, "list");
		ListVector* list = input.getListVector();
		
		//get first one or the one we want
		if (thisLabel != "") { 	
			//use smart distancing
			set<string> userLabels; userLabels.insert(thisLabel);
			set<string> processedLabels;
			string lastLabel = list->getLabel();
			while((list != NULL) && (userLabels.size() != 0)) {
				if(userLabels.count(list->getLabel()) == 1){
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					break;
				}
				
				if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					delete list;
					list = input.getListVector(lastLabel);
					break;
				}
				lastLabel = list->getLabel();
				delete list;
				list = input.getListVector();
			}
			if (userLabels.size() != 0) { 
				m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
				list = input.getListVector(lastLabel); 
			}
		}
		
		//if groupfile is given then use it
		GroupMap* groupMap;
		if (groupfile != "") { 
			groupMap = new GroupMap(groupfile); groupMap->readMap(); 
			SharedUtil util;
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			util.setGroups(Groups, namesGroups);
			m->openOutputFile(outputGroupFileName, outGroup);
		}
		
		
		if (list != NULL) {	
			//make a new list vector
			ListVector newList;
			newList.setLabel(list->getLabel());
			
			//for each bin
			for (int i = 0; i < list->getNumBins(); i++) {
				if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list->get(i);
				vector<string> names;
				string saveBinNames = binnames;
				m->splitAtComma(binnames, names);
				
				vector<string> newGroupFile;
				if (groupfile != "") {
					vector<string> newNames;
					saveBinNames = "";
					for(int k = 0; k < names.size(); k++) {
						string group = groupMap->getGroup(names[k]);
						
						if (m->inUsersGroups(group, Groups)) {
							newGroupFile.push_back(names[k] + "\t" + group); 
								
							newNames.push_back(names[k]);	
							saveBinNames += names[k] + ",";
						}
					}
					names = newNames;
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
				}

				if (names.size() > nseqs) { //keep bin
					newList.push_back(saveBinNames);
					for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }
				}
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.print(out);
			}
		}	
		
		out.close();
		if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveRareCommand", "processList");
		exit(1);
	}
}
예제 #2
0
int CountTable::createTable(string namefile, string groupfile, bool createGroup) {
    try {
        
        if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; }
                                           
        GroupMap* groupMap;
        int numGroups = 0;
        groups.clear();
        totalGroups.clear();
        indexGroupMap.clear();
        indexNameMap.clear();
        counts.clear();
        map<int, string> originalGroupIndexes;
        
        if (groupfile != "") { 
            hasGroups = true;
            groupMap = new GroupMap(groupfile); groupMap->readMap();
            numGroups = groupMap->getNumGroups();
            groups = groupMap->getNamesOfGroups();
            totalGroups.resize(numGroups, 0);
        }else if(createGroup) {
            hasGroups = true;
            numGroups = 1;
            groups.push_back("Group1");
            totalGroups.resize(numGroups, 0);
        }
		//sort groups to keep consistent with how we store the groups in groupmap
        sort(groups.begin(), groups.end());
        for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
        m->setAllGroups(groups);
        
        bool error = false;
        string name;
        uniques = 0;
        total = 0;
        
        
        //open input file
        ifstream in;
        m->openInputFile(namefile, in);
        
        int total = 0;
        while (!in.eof()) {
            if (m->control_pressed) { break; }
            
            string firstCol, secondCol;
            in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
            
            m->checkName(firstCol);
            m->checkName(secondCol);
            
            vector<string> names;
            m->splitAtChar(secondCol, names, ',');
            
            map<string, int> groupCounts;
            int thisTotal = 0;
            if (groupfile != "") {
                //set to 0
                for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; }
                
                //get counts for each of the users groups
                for (int i = 0; i < names.size(); i++) {
                    string group = groupMap->getGroup(names[i]);
                    
                    if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; }
                    else {
                        map<string, int>::iterator it = groupCounts.find(group);
                        
                        //if not found, then this sequence is not from a group we care about
                        if (it != groupCounts.end()) {
                            it->second++;
                            thisTotal++;
                        }
                    }
                }
            }else if (createGroup) {
                groupCounts["Group1"]=0;
                for (int i = 0; i < names.size(); i++) {
                    string group = "Group1";
                    groupCounts["Group1"]++; thisTotal++;
                }
            }else { thisTotal = names.size();  }
            
            //if group info, then read it
            vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0);
            for (int i = 0; i < numGroups; i++) {  
                thisGroupsCount[i] = groupCounts[groups[i]]; 
                totalGroups[i] += thisGroupsCount[i]; 
            }
            
            map<string, int>::iterator it = indexNameMap.find(firstCol);
            if (it == indexNameMap.end()) {
                if (hasGroups) {  counts.push_back(thisGroupsCount);  }
                indexNameMap[firstCol] = uniques;
                totals.push_back(thisTotal);
                total += thisTotal;
                uniques++;
            }else {
                error = true;
                m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); 
            }
        }
        in.close();
		
        if (error) { m->control_pressed = true; }
        else { //check for zero groups
            if (hasGroups) {
                for (int i = 0; i < totalGroups.size(); i++) {
                    if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
                }
            }
        }
        if (groupfile != "") { delete groupMap; }
        
        return 0;
    }
	catch(exception& e) {
		m->errorOut(e, "CountTable", "createTable");
		exit(1);
	}
}
예제 #3
0
int ClassifySeqsCommand::execute(){
	try {
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
        
        string outputMethodTag = method;
		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion());	}
		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, util.getRandomNumber(), current->getVersion());				}
        else if(method == "zap"){	
            outputMethodTag = search + "_" + outputMethodTag;
            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
        }
		else {
			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
			m->mothurOutEndLine();
			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion());
		}
		
		if (m->getControl_pressed()) { delete classify; return 0; }
				
        m->mothurOut("Classifying sequences from " + fastafile + " ...\n" );
        
        string baseTName = util.getSimpleName(taxonomyFileName);
        
        //set rippedTaxName to
        string RippedTaxName = "";
        bool foundDot = false;
        for (int i = baseTName.length()-1; i >= 0; i--) {
            if (foundDot && (baseTName[i] != '.')) {  RippedTaxName = baseTName[i] + RippedTaxName; }
            else if (foundDot && (baseTName[i] == '.')) {  break; }
            else if (!foundDot && (baseTName[i] == '.')) {  foundDot = true; }
        }
        
        if (outputDir == "") { outputDir += util.hasPath(fastafile); }
        map<string, string> variables;
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
        variables["[tag]"] = RippedTaxName;
        variables["[tag2]"] = outputMethodTag;
        string newTaxonomyFile = getOutputFileName("taxonomy", variables);
        string newaccnosFile = getOutputFileName("accnos", variables);
        string tempTaxonomyFile = outputDir + util.getRootName(util.getSimpleName(fastafile)) + "taxonomy.temp";
        string taxSummary = getOutputFileName("taxsummary", variables);
        
        if ((method == "knn") && (search == "distance")) {
            string DistName = getOutputFileName("matchdist", variables);
            classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
        }
        
        outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
        outputNames.push_back(taxSummary);	outputTypes["taxsummary"].push_back(taxSummary);
        
        long start = time(NULL);
        int numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastafile);
        
        if (!util.isBlank(newaccnosFile)) { m->mothurOut("\n[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences.\n");
            outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
        }else { util.mothurRemove(newaccnosFile); }
        
        m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.\n\n");
        start = time(NULL);
        
        //read namefile
        map<string, vector<string> > nameMap;
        map<string,  vector<string> >::iterator itNames;
        if(namefile != "") {
            m->mothurOut("Reading " + namefile + "..."); cout.flush();
            nameMap.clear(); //remove old names
            util.readNames(namefile, nameMap);
            m->mothurOut("  Done.\n");
        }
        
        //output taxonomy with the unclassified bins added
        ifstream inTax;
        util.openInputFile(newTaxonomyFile, inTax);
        
        ofstream outTax;
        string unclass = newTaxonomyFile + ".unclass.temp";
        util.openOutputFile(unclass, outTax);
        
        //get maxLevel from phylotree so you know how many 'unclassified's to add
        int maxLevel = classify->getMaxLevel();
        
        //read taxfile - this reading and rewriting is done to preserve the confidence scores.
        string name, taxon;
        GroupMap* groupMap = NULL;
        CountTable* ct = NULL;
        PhyloSummary* taxaSum;
        
        if (hasCount) {
            ct = new CountTable();
            ct->readTable(countfile, true, false);
            taxaSum = new PhyloSummary(ct, relabund, printlevel);
        }else {
            if (groupfile != "") {  groupMap = new GroupMap(groupfile); groupMap->readMap(); }
            taxaSum = new PhyloSummary(groupMap, relabund, printlevel);
        }
        
        while (!inTax.eof()) {
            if (m->getControl_pressed()) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);	} delete classify; return 0; }
            
            inTax >> name; util.gobble(inTax);
            taxon = util.getline(inTax); util.gobble(inTax);
            
            string newTax = util.addUnclassifieds(taxon, maxLevel, probs);
            
            outTax << name << '\t' << newTax << endl;
            
            if (namefile != "") {
                itNames = nameMap.find(name);
                
                if (itNames == nameMap.end()) {
                    m->mothurOut(name + " is not in your name file please correct.\n");  exit(1);
                }else{
                    //add it as many times as there are identical seqs
                    for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); }
                    itNames->second.clear();
                    nameMap.erase(itNames->first);
                }
            }else { taxaSum->addSeqToTree(name, newTax); }
        }
        inTax.close();
        outTax.close();
        
        util.mothurRemove(newTaxonomyFile);
        util.renameFile(unclass, newTaxonomyFile);
        
        if (m->getControl_pressed()) {  outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);	} delete classify; return 0; }
        
        //print summary file
        ofstream outTaxTree;
        util.openOutputFile(taxSummary, outTaxTree);
        taxaSum->print(outTaxTree, output);
        outTaxTree.close();
        
        if (ct != NULL) { delete ct; }
        if (groupMap != NULL) { delete groupMap; } delete taxaSum;
        util.mothurRemove(tempTaxonomyFile);
        delete classify;
        
        m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences.\n\n");

        m->mothurOut("\nOutput File Names: \n");
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
        m->mothurOutEndLine();
		
		//set taxonomy file as new current taxonomyfile
		string currentName = "";
		itTypes = outputTypes.find("taxonomy");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } }
		
		currentName = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } }

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClassifySeqsCommand", "execute");
		exit(1);
	}
}
예제 #4
0
//**********************************************************************************************************************
int SharedCommand::createSharedFromListGroup() {
	try {

        GroupMap* groupMap = NULL;
        CountTable* countTable = NULL;
        pickedGroups = false;
        if (groupfile != "") {
            groupMap = new GroupMap(groupfile);

            int groupError = groupMap->readMap();
            if (groupError == 1) { delete groupMap; return 0; }
            vector<string> allGroups = groupMap->getNamesOfGroups();
            if (Groups.size() == 0) { Groups = allGroups; }
            else { pickedGroups = true; }
        }else{
            countTable = new CountTable();
            countTable->readTable(countfile, true, false);
            vector<string> allGroups = countTable->getNamesOfGroups();
            if (Groups.size() == 0) { Groups = allGroups; }
            else { pickedGroups = true; }
        }
        int numGroups = Groups.size();
        if (m->getControl_pressed()) { return 0; }

        ofstream out;
        string filename = "";
        if (!pickedGroups) {
            string filename = listfile;
            if (outputDir == "") { outputDir += util.hasPath(filename); }

            map<string, string> variables;
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
            filename = getOutputFileName("shared",variables);
            outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
            util.openOutputFile(filename, out);
        }

        //set fileroot
        fileroot = outputDir + util.getRootName(util.getSimpleName(listfile));
        map<string, string> variables;
		variables["[filename]"] = fileroot;
        string errorOff = "no error";

        InputData input(listfile, "shared", Groups);
        SharedListVector* SharedList = input.getSharedListVector();
        string lastLabel = SharedList->getLabel();
        SharedRAbundVectors* lookup;

        if (m->getControl_pressed()) {
            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
            out.close(); if (!pickedGroups) { util.mothurRemove(filename); }
            return 0;
        }

        //sanity check
        vector<string> namesSeqs;
        int numGroupNames = 0;
        if (current->getGroupMode() == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
        else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
        int error = ListGroupSameSeqs(namesSeqs, SharedList);

        if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) {  //if the user has not specified any groups and their files don't match exit with error
            m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct.\n");  m->setControl_pressed(true);

            out.close(); if (!pickedGroups) { util.mothurRemove(filename); } //remove blank shared file you made

            //delete memory
            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
            return 0;
        }

        if (error == 1) { m->setControl_pressed(true); }

        //if user has specified groups make new groupfile for them
        if ((pickedGroups) && (current->getGroupMode() == "group")) { //make new group file
            string groups = "";
            if (numGroups < 4) {
                for (int i = 0; i < numGroups-1; i++) {
                    groups += Groups[i] + ".";
                }
                groups+=Groups[numGroups-1];
            }else { groups = "merge"; }
            map<string, string> variables;
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(listfile));
            variables["[group]"] = groups;
            string newGroupFile = getOutputFileName("group",variables);
            outputTypes["group"].push_back(newGroupFile);
            outputNames.push_back(newGroupFile);
            ofstream outGroups;
            util.openOutputFile(newGroupFile, outGroups);

            vector<string> names = groupMap->getNamesSeqs();
            string groupName;
            for (int i = 0; i < names.size(); i++) {
                groupName = groupMap->getGroup(names[i]);
                if (isValidGroup(groupName, Groups)) {
                    outGroups << names[i] << '\t' << groupName << endl;
                }
            }
            outGroups.close();
        }

        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
        set<string> processedLabels;
        set<string> userLabels = labels;
        bool printHeaders = true;
    
        while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
            if (m->getControl_pressed()) {
                delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                return 0;
            }

            if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){

                lookup = SharedList->getSharedRAbundVector();

                m->mothurOut(lookup->getLabel()+"\n"); 

                if (m->getControl_pressed()) {
                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                    delete lookup;
                    if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                    return 0;
                }

                //if picked groups must split the shared file by label
                if (pickedGroups) {
                    string filename = listfile;
                    if (outputDir == "") { outputDir += util.hasPath(filename); }

                    map<string, string> variables;
                    variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                    variables["[distance]"] = lookup->getLabel();
                    filename = getOutputFileName("shared",variables);
                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                    ofstream out2;
                    util.openOutputFile(filename, out2);

                    lookup->eliminateZeroOTUS();
                    printSharedData(lookup, out2, printHeaders);
                    out2.close();
                }else {
                    printSharedData(lookup, out, printHeaders); //prints info to the .shared file
                }
                delete lookup;

                processedLabels.insert(SharedList->getLabel());
                userLabels.erase(SharedList->getLabel());
            }

            if ((util.anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) ) && (processedLabels.count(lastLabel) != 1)) {
                string saveLabel = SharedList->getLabel();

                delete SharedList;
                SharedList = input.getSharedListVector(lastLabel); //get new list vector to process

                lookup = SharedList->getSharedRAbundVector();
                m->mothurOut(lookup->getLabel()+"\n"); 

                if (m->getControl_pressed()) {
                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                    delete lookup;
                    if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                    return 0;
                }

                //if picked groups must split the shared file by label
                if (pickedGroups) {
                    string filename = listfile;
                    if (outputDir == "") { outputDir += util.hasPath(filename); }

                    map<string, string> variables;
                    variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                    variables["[distance]"] = lookup->getLabel();
                    filename = getOutputFileName("shared",variables);
                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                    ofstream out2;
                    util.openOutputFile(filename, out2);

                    lookup->eliminateZeroOTUS();
                    printSharedData(lookup, out2, printHeaders);
                    out2.close();
                }else {
                    printSharedData(lookup, out, printHeaders); //prints info to the .shared file
                }
                delete lookup;

                processedLabels.insert(SharedList->getLabel());
                userLabels.erase(SharedList->getLabel());

                //restore real lastlabel to save below
                SharedList->setLabel(saveLabel);
            }


            lastLabel = SharedList->getLabel();

            delete SharedList;
            SharedList = input.getSharedListVector(); //get new list vector to process
        }
        
        //output error messages about any remaining user labels
        set<string>::iterator it;
        bool needToRun = false;
        for (it = userLabels.begin(); it != userLabels.end(); it++) {
            if (processedLabels.count(lastLabel) != 1) {
                needToRun = true;
            }
        }
        
        //run last label if you need to
        if (needToRun )  {
            if (SharedList != NULL) {	delete SharedList;	}
            SharedList = input.getSharedListVector(lastLabel); //get new list vector to process

            lookup = SharedList->getSharedRAbundVector();
            m->mothurOut(lookup->getLabel()+"\n"); 

            if (m->getControl_pressed()) {
                if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                return 0;
            }

            //if picked groups must split the shared file by label
            if (pickedGroups) {
                string filename = listfile;
                if (outputDir == "") { outputDir += util.hasPath(filename); }

                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                variables["[distance]"] = lookup->getLabel();
                filename = getOutputFileName("shared",variables);
                outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                ofstream out2;
                util.openOutputFile(filename, out2);

                lookup->eliminateZeroOTUS();
                printSharedData(lookup, out2, printHeaders);
                out2.close();
            }else {
                printSharedData(lookup, out, printHeaders); //prints info to the .shared file
            }
            delete lookup;
            delete SharedList;
        }
        
        if (!pickedGroups) { out.close(); }

        if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }

        if (m->getControl_pressed()) {
            if (!pickedGroups) { util.mothurRemove(filename); }
            return 0;
        }
        
        return 0;
    }
	catch(exception& e) {
		m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
		exit(1);
	}
}
예제 #5
0
int ClassifySeqsCommand::execute(){
	try {
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
        
        string outputMethodTag = method;
		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	}
		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());				}
        else if(method == "zap"){	
            outputMethodTag = search + "_" + outputMethodTag;
            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
        }
		else {
			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
			m->mothurOutEndLine();
			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	
		}
		
		if (m->control_pressed) { delete classify; return 0; }
				
		for (int s = 0; s < fastaFileNames.size(); s++) {
		
			m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
			
			string baseTName = m->getSimpleName(taxonomyFileName);
			
            //set rippedTaxName to 
			string RippedTaxName = "";
            bool foundDot = false;
            for (int i = baseTName.length()-1; i >= 0; i--) {
                if (foundDot && (baseTName[i] != '.')) {  RippedTaxName = baseTName[i] + RippedTaxName; }
                else if (foundDot && (baseTName[i] == '.')) {  break; }
                else if (!foundDot && (baseTName[i] == '.')) {  foundDot = true; }
            }
            //if (RippedTaxName != "") { RippedTaxName +=  "."; }   
          
			if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
            variables["[tag]"] = RippedTaxName;
            variables["[tag2]"] = outputMethodTag;
			string newTaxonomyFile = getOutputFileName("taxonomy", variables);
			string newaccnosFile = getOutputFileName("accnos", variables);
			string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
			string taxSummary = getOutputFileName("taxsummary", variables);
			
			if ((method == "knn") && (search == "distance")) { 
				string DistName = getOutputFileName("matchdist", variables);
				classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
			}
			
			outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
			outputNames.push_back(taxSummary);	outputTypes["taxsummary"].push_back(taxSummary);
			
			int start = time(NULL);
			int numFastaSeqs = 0;
			for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
		
			vector<unsigned long long> positions; 
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
			positions = m->divideFile(fastaFileNames[s], processors);
			for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
#else
			if (processors == 1) {
				lines.push_back(new linePair(0, 1000));
			}else {
				positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); 
                if (numFastaSeqs < processors) { processors = numFastaSeqs; }
				
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numFastaSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			}
#endif
			if(processors == 1){
				numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]);
			}else{
				numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); 
			}
			
			if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
                outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
            }else { m->mothurRemove(newaccnosFile); }

            m->mothurOutEndLine();
            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
            start = time(NULL);
            
            //read namefile
            if(namefile != "") {
                
                m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
                nameMap.clear(); //remove old names
                m->readNames(namefileNames[s], nameMap);
                m->mothurOut("  Done."); m->mothurOutEndLine();
            }
            
            //output taxonomy with the unclassified bins added
            ifstream inTax;
            m->openInputFile(newTaxonomyFile, inTax);
            
            ofstream outTax;
            string unclass = newTaxonomyFile + ".unclass.temp";
            m->openOutputFile(unclass, outTax);
            
            //get maxLevel from phylotree so you know how many 'unclassified's to add
            int maxLevel = classify->getMaxLevel();
            
            //read taxfile - this reading and rewriting is done to preserve the confidence scores.
            string name, taxon;
            string group = "";
            GroupMap* groupMap = NULL;
            CountTable* ct = NULL;
            PhyloSummary* taxaSum;
            
            if (hasCount) {
                ct = new CountTable();
                ct->readTable(countfileNames[s], true, false);
                taxaSum = new PhyloSummary(ct, relabund, printlevel);
            }else {
                if (groupfile != "") {  group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); }
                taxaSum = new PhyloSummary(groupMap, relabund, printlevel);
            }
            
            while (!inTax.eof()) {
                if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
                
                inTax >> name >> taxon; m->gobble(inTax);
                
                string newTax = m->addUnclassifieds(taxon, maxLevel, probs);
                
                outTax << name << '\t' << newTax << endl;
                
                if (namefile != "") {
                    itNames = nameMap.find(name);
                    
                    if (itNames == nameMap.end()) {
                        m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
                    }else{
                        for (int i = 0; i < itNames->second.size(); i++) {
                            taxaSum->addSeqToTree(itNames->second[i], newTax);  //add it as many times as there are identical seqs
                        }
                        itNames->second.clear();
                        nameMap.erase(itNames->first);
                    }
                }else {
                    taxaSum->addSeqToTree(name, newTax);
                }
            }
            inTax.close();
            outTax.close();
            
            m->mothurRemove(newTaxonomyFile);
            rename(unclass.c_str(), newTaxonomyFile.c_str());
            
            if (m->control_pressed) {  outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
            
            //print summary file
            ofstream outTaxTree;
            m->openOutputFile(taxSummary, outTaxTree);
            taxaSum->print(outTaxTree, output);
            outTaxTree.close();
            
            if (ct != NULL) { delete ct; }
            if (groupMap != NULL) { delete groupMap; } delete taxaSum;
            m->mothurRemove(tempTaxonomyFile);
            
            m->mothurOutEndLine();
            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
			
		}
        delete classify;
        
        m->mothurOutEndLine();
        m->mothurOut("Output File Names: "); m->mothurOutEndLine();
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
        m->mothurOutEndLine();
		
		//set taxonomy file as new current taxonomyfile
		string current = "";
		itTypes = outputTypes.find("taxonomy");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
		}
		
		current = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
		}
		
		
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClassifySeqsCommand", "execute");
		exit(1);
	}
}