Example #1
0
//**********************************************************************************************************************
int GetSeqsCommand::readCount(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
		map<string, string> variables; 
		variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
        variables["[extension]"] = m->getExtension(countfile);
		string outputFileName = getOutputFileName("count", variables);
		
		ofstream out;
		m->openOutputFile(outputFileName, out);
		
		ifstream in;
		m->openInputFile(countfile, in);
		
		bool wroteSomething = false;
		int selectedCount = 0;
		
        string headers = m->getline(in); m->gobble(in);
        out << headers << endl;
        string test = headers; vector<string> pieces = m->splitWhiteSpace(test);
        
        string name, rest; int thisTotal; rest = "";
        while (!in.eof()) {
            
            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
            
            in >> name; m->gobble(in); 
            in >> thisTotal; m->gobble(in);
            if (pieces.size() > 2) {  rest = m->getline(in); m->gobble(in);  }
            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
            
            if (names.count(name) != 0) {
                out << name << '\t' << thisTotal << '\t' << rest << endl;
                wroteSomething = true;
                selectedCount+= thisTotal;
            }
        }
        in.close();
		out.close();
        
        //check for groups that have been eliminated
        CountTable ct;
        if (ct.testGroups(outputFileName)) {
            ct.readTable(outputFileName, true, false);
            ct.printTable(outputFileName);
        }
		
		if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
		outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "GetSeqsCommand", "readCount");
		exit(1);
	}
}
SequenceCountParser::SequenceCountParser(string countfile, string fastafile) {
	try {
		
		m = MothurOut::getInstance();
		
		//read count file
		CountTable countTable;
		countTable.readTable(countfile, true, false);
		
		//initialize maps
		namesOfGroups = countTable.getNamesOfGroups();
		for (int i = 0; i < namesOfGroups.size(); i++) {
			vector<Sequence> temp;
			map<string, int> tempMap;
			seqs[namesOfGroups[i]] = temp;
			countTablePerGroup[namesOfGroups[i]] = tempMap;
		}
		
		//read fasta file making sure each sequence is in the group file
		ifstream in;
		m->openInputFile(fastafile, in);
		
        int fastaCount = 0;
		while (!in.eof()) {
			
			if (m->control_pressed) { break; }
			
			Sequence seq(in); m->gobble(in);
            fastaCount++;
            if (m->debug) { if((fastaCount) % 1000 == 0){	m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n.");	} }
			
            if (seq.getName() != "") {
				
                allSeqsMap[seq.getName()] = seq.getName();
                vector<int> groupCounts = countTable.getGroupCounts(seq.getName());
                
                for (int i = 0; i < namesOfGroups.size(); i++) {
                    if (groupCounts[i] != 0) {
                        seqs[namesOfGroups[i]].push_back(seq);	
                        countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i];
                    }
                }
			}
		}
		in.close();					
	}
	catch(exception& e) {
		m->errorOut(e, "SequenceCountParser", "SequenceCountParser");
		exit(1);
	}
}
Example #3
0
//**********************************************************************************************************************
int ListSeqsCommand::readCount(){
	try {
		CountTable ct;
		ct.readTable(countfile, false, false);
        
        if (m->control_pressed) { return 0; }
        
        names = ct.getNamesOfSeqs();
        
        return 0;
        
	}
	catch(exception& e) {
		m->errorOut(e, "ListSeqsCommand", "readCount");
		exit(1);
	}
}
Example #4
0
//**********************************************************************************************************************
int SharedCommand::createSharedFromCount() {
    try {
        //getting output filename
        string filename = countfile;
        if (outputDir == "") { outputDir += util.hasPath(filename); }
        
        map<string, string> variables;
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
        filename = getOutputFileName("shared",variables);
        outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
        
        ofstream out; bool printHeaders = true;
        util.openOutputFile(filename, out);
        
        CountTable ct;  ct.readTable(countfile, true, false);
        
        map<string, string> seqNameToOtuName;
        SharedRAbundVectors* lookup = ct.getShared(Groups, seqNameToOtuName);
        lookup->setLabels(*labels.begin());
        lookup->print(out, printHeaders);
        
        out.close();
        delete lookup;
        
        string mapFilename = getOutputFileName("map",variables);
        outputNames.push_back(mapFilename); outputTypes["map"].push_back(mapFilename);
        ofstream outMap;
        util.openOutputFile(mapFilename, outMap);
        
        for (map<string, string>::iterator it = seqNameToOtuName.begin(); it != seqNameToOtuName.end(); it++) { outMap << it->first << '\t' << it->second << endl; }
        outMap.close();
        
        return 0;
    }
    catch(exception& e) {
        m->errorOut(e, "SharedCommand", "createSharedFromCount");
        exit(1);
    }
}
Example #5
0
int SeqSummaryCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//set current fasta to fastafile
		m->setFastaFile(fastafile);
		
        map<string, string> variables; 
		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
		string summaryFile = getOutputFileName("summary",variables);
				
		int numSeqs = 0;
		
		vector<int> startPosition;
		vector<int> endPosition;
		vector<int> seqLength;
		vector<int> ambigBases;
		vector<int> longHomoPolymer;
		
		if (namefile != "") { nameMap = m->readNames(namefile); }
        else if (countfile != "") {
            CountTable ct;
            ct.readTable(countfile, false, false);
            nameMap = ct.getNameMap();
        }
		
		if (m->control_pressed) { return 0; }
			
#ifdef USE_MPI	
				int pid, numSeqsPerProcessor; 
				int tag = 2001;
				int startTag = 1; int endTag = 2; int lengthTag = 3; int baseTag = 4; int lhomoTag = 5;
				int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
				vector<unsigned long long> MPIPos;
				
				MPI_Status status; 
				MPI_Status statusOut;
				MPI_File inMPI; 
				MPI_File outMPI; 
				MPI_Comm_size(MPI_COMM_WORLD, &processors);
				MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
							
				char tempFileName[1024];
				strcpy(tempFileName, fastafile.c_str());
				
				char sumFileName[1024];
				strcpy(sumFileName, summaryFile.c_str());
		
				MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
				MPI_File_open(MPI_COMM_WORLD, sumFileName, outMode, MPI_INFO_NULL, &outMPI);
				
				if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); return 0;  }
				
				if (pid == 0) { //you are the root process
						//print header
						string outputString = "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs\n";	
						int length = outputString.length();
						char* buf2 = new char[length];
						memcpy(buf2, outputString.c_str(), length);
					
						MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &statusOut);
						delete buf2;
						
						MPIPos = m->setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
					
						for(int i = 1; i < processors; i++) { 
							MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
							MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
						}
						
						//figure out how many sequences you have to do
						numSeqsPerProcessor = numSeqs / processors;
						int startIndex =  pid * numSeqsPerProcessor;
						if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
						
						//do your part
						MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
						
				}else { //i am the child process
			
					MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
					MPIPos.resize(numSeqs+1);
					MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
				
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
				
					//do your part
					MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
				}
				
				MPI_File_close(&inMPI);
				MPI_File_close(&outMPI);
				MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
				
				if (pid == 0) {
					//get the info from the child processes
					for(int i = 1; i < processors; i++) { 
						int size;
						MPI_Recv(&size, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);

						vector<int> temp; temp.resize(size+1);
						
						for(int j = 0; j < 5; j++) { 
						
							MPI_Recv(&temp[0], (size+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status); 
							int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what count this is for
							
							if (receiveTag == startTag) { 
								for (int k = 0; k < size; k++) {		startPosition.push_back(temp[k]);	}
							}else if (receiveTag == endTag) { 
								for (int k = 0; k < size; k++) {		endPosition.push_back(temp[k]);	}
							}else if (receiveTag == lengthTag) { 
								for (int k = 0; k < size; k++) {		seqLength.push_back(temp[k]);	}
							}else if (receiveTag == baseTag) { 
								for (int k = 0; k < size; k++) {		ambigBases.push_back(temp[k]);	}
							}else if (receiveTag == lhomoTag) { 
								for (int k = 0; k < size; k++) {		longHomoPolymer.push_back(temp[k]);	}
							}
						} 
					}

				}else{
				
					//send my counts
					int size = startPosition.size();
					MPI_Send(&size, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
					
					startPosition.push_back(startTag);
					int ierr = MPI_Send(&(startPosition[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					endPosition.push_back(endTag);
					ierr = MPI_Send (&(endPosition[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					seqLength.push_back(lengthTag);
					ierr = MPI_Send(&(seqLength[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					ambigBases.push_back(baseTag);
					ierr = MPI_Send(&(ambigBases[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					longHomoPolymer.push_back(lhomoTag);
					ierr = MPI_Send(&(longHomoPolymer[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
				}
				
				MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
			vector<unsigned long long> positions; 
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				positions = m->divideFile(fastafile, processors);
				for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
			#else
				positions = m->setFilePosFasta(fastafile, numSeqs); 
                if (positions.size() < processors) { processors = positions.size(); }
		
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			#endif
			

			if(processors == 1){
				numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
			}else{
				numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
			}
			
			if (m->control_pressed) {  return 0; }
#endif
			
		#ifdef USE_MPI
			if (pid == 0) { 
		#endif
		
		sort(startPosition.begin(), startPosition.end());
		sort(endPosition.begin(), endPosition.end());
		sort(seqLength.begin(), seqLength.end());
		sort(ambigBases.begin(), ambigBases.end());
		sort(longHomoPolymer.begin(), longHomoPolymer.end());
		int size = startPosition.size();
		
		//find means
		unsigned long long meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
		meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
		for (int i = 0; i < size; i++) {
			meanStartPosition += startPosition[i];
			meanEndPosition += endPosition[i];
			meanSeqLength += seqLength[i];
			meanAmbigBases += ambigBases[i];
			meanLongHomoPolymer += longHomoPolymer[i];
		}
                
        double meanstartPosition, meanendPosition, meanseqLength, meanambigBases, meanlongHomoPolymer;
                
		meanstartPosition = meanStartPosition / (double) size; meanendPosition = meanEndPosition /(double) size; meanlongHomoPolymer = meanLongHomoPolymer / (double) size; meanseqLength = meanSeqLength / (double) size; meanambigBases = meanAmbigBases /(double) size;
				
		int ptile0_25	= int(size * 0.025);
		int ptile25		= int(size * 0.250);
		int ptile50		= int(size * 0.500);
		int ptile75		= int(size * 0.750);
		int ptile97_5	= int(size * 0.975);
		int ptile100	= size - 1;
		
		//to compensate for blank sequences that would result in startPosition and endPostion equalling -1
		if (startPosition[0] == -1) {  startPosition[0] = 0;	}
		if (endPosition[0] == -1)	{  endPosition[0] = 0;		}
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer\tNumSeqs"); m->mothurOutEndLine();
		m->mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0]) + "\t" + toString(1)); m->mothurOutEndLine();
		m->mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25]) + "\t" + toString(ptile0_25+1)); m->mothurOutEndLine();
		m->mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25]) + "\t" + toString(ptile25+1)); m->mothurOutEndLine();
		m->mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50]) + "\t" + toString(ptile50+1)); m->mothurOutEndLine();
		m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75]) + "\t" + toString(ptile75+1)); m->mothurOutEndLine();
		m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5]) + "\t" + toString(ptile97_5+1)); m->mothurOutEndLine();
		m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
		m->mothurOut("Mean:\t" + toString(meanstartPosition) + "\t" + toString(meanendPosition) + "\t" + toString(meanseqLength) + "\t" + toString(meanambigBases) + "\t" + toString(meanlongHomoPolymer)); m->mothurOutEndLine();

		if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
		else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(summaryFile); m->mothurOutEndLine();	outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
		m->mothurOutEndLine();
		
		#ifdef USE_MPI
			}
		#endif

        //set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("summary");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSummaryFile(current); }
		}
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SeqSummaryCommand", "execute");
		exit(1);
	}
}
int RemoveGroupsCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//get groups you want to remove
		if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups);  }
		
		if (groupfile != "") {
			groupMap = new GroupMap(groupfile);
			groupMap->readMap();
			
			//make sure groups are valid
			//takes care of user setting groupNames that are invalid or setting groups=all
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			vector<string> checkedGroups;
            for (int i = 0; i < Groups.size(); i++) {
                if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
                else {  m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
            }
            
            if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
			else {
                Groups = checkedGroups;
                m->setGroups(Groups);
            }
            
			//fill names with names of sequences that are from the groups we want to remove 
			fillNames();
			
			delete groupMap;
		}else if (countfile != ""){
            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
            }
            CountTable ct;
            ct.readTable(countfile, true, false);
            if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
            
            vector<string> gNamesOfGroups = ct.getNamesOfGroups();
            SharedUtil util;
            util.setGroups(Groups, gNamesOfGroups);
            vector<string> namesOfSeqs = ct.getNamesOfSeqs();
            sort(Groups.begin(), Groups.end());
            
            for (int i = 0; i < namesOfSeqs.size(); i++) {
                vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
                if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
                    names.insert(namesOfSeqs[i]);
                }
            }
        }

				
		if (m->control_pressed) { return 0; }
		
		//read through the correct file and output lines you want to keep
		if (namefile != "")			{		readName();		}
		if (fastafile != "")		{		readFasta();	}
		if (groupfile != "")		{		readGroup();	}
        if (countfile != "")		{		readCount();	}
		if (listfile != "")			{		readList();		}
		if (taxfile != "")			{		readTax();		}
		if (sharedfile != "")		{		readShared();	}
        if (designfile != "")		{		readDesign();	}
		
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0; }
				
		if (outputNames.size() != 0) {
			m->mothurOutEndLine();
			m->mothurOut("Output File names: "); m->mothurOutEndLine();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
			m->mothurOutEndLine();
			
			//set fasta file as new current fastafile
			string current = "";
			itTypes = outputTypes.find("fasta");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
			}
			
			itTypes = outputTypes.find("name");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
			}
			
			itTypes = outputTypes.find("group");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
			}
			
			itTypes = outputTypes.find("list");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
			}
			
			itTypes = outputTypes.find("taxonomy");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
			}
			
			itTypes = outputTypes.find("shared");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
			}
            
            itTypes = outputTypes.find("design");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
			}
            
            itTypes = outputTypes.find("count");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
			}
		}
		
		return 0;		
	}
	
	catch(exception& e) {
		m->errorOut(e, "RemoveGroupsCommand", "execute");
		exit(1);
	}
}
Example #7
0
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
	try {
				
		//you must provide a label because the names in the listfile need to be consistent
		string thisLabel = "";
		if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
		else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
		else { thisLabel = *labels.begin(); }
		
		InputData input(listfile, "list");
		ListVector* list = input.getListVector();
		
		//get first one or the one we want
		if (thisLabel != "") { 	
			//use smart distancing
			set<string> userLabels; userLabels.insert(thisLabel);
			set<string> processedLabels;
			string lastLabel = list->getLabel();
			while((list != NULL) && (userLabels.size() != 0)) {
				if(userLabels.count(list->getLabel()) == 1){
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					break;
				}
				
				if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					delete list;
					list = input.getListVector(lastLabel);
					break;
				}
				lastLabel = list->getLabel();
				delete list;
				list = input.getListVector();
			}
			if (userLabels.size() != 0) { 
				m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
				list = input.getListVector(lastLabel); 
			}
		}
        
        string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
        variables["[extension]"] = m->getExtension(listfile);
        variables["[tag]"] = list->getLabel();
		string outputFileName = getOutputFileName("list", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
        variables["[extension]"] = m->getExtension(groupfile);
		string outputGroupFileName = getOutputFileName("group", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
        variables["[extension]"] = m->getExtension(countfile);
        string outputCountFileName = getOutputFileName("count", variables);
        
		ofstream out, outGroup;
		m->openOutputFile(outputFileName, out);
		
		bool wroteSomething = false;

		
		//if groupfile is given then use it
		GroupMap* groupMap;
        CountTable ct;
		if (groupfile != "") { 
			groupMap = new GroupMap(groupfile); groupMap->readMap(); 
			SharedUtil util;
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			util.setGroups(Groups, namesGroups);
			m->openOutputFile(outputGroupFileName, outGroup);
		}else if (countfile != "") {
            ct.readTable(countfile, true, false);
            if (ct.hasGroupInfo()) {
                vector<string> namesGroups = ct.getNamesOfGroups();
                SharedUtil util;
                util.setGroups(Groups, namesGroups);
            }
        }
		
		
		if (list != NULL) {
            
            vector<string> binLabels = list->getLabels();
            vector<string> newLabels;
            
			//make a new list vector
			ListVector newList;
			newList.setLabel(list->getLabel());
			
			//for each bin
			for (int i = 0; i < list->getNumBins(); i++) {
				if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list->get(i);
				vector<string> names;
				string saveBinNames = binnames;
				m->splitAtComma(binnames, names);
                int binsize = names.size();
				
				vector<string> newGroupFile;
				if (groupfile != "") {
					vector<string> newNames;
					saveBinNames = "";
					for(int k = 0; k < names.size(); k++) {
						string group = groupMap->getGroup(names[k]);
						
						if (m->inUsersGroups(group, Groups)) {
							newGroupFile.push_back(names[k] + "\t" + group); 
								
							newNames.push_back(names[k]);	
							saveBinNames += names[k] + ",";
						}
					}
					names = newNames; binsize = names.size();
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
				}else if (countfile != "") {
					saveBinNames = "";
                    binsize = 0;
					for(int k = 0; k < names.size(); k++) {
                        if (ct.hasGroupInfo()) {
                            vector<string> thisSeqsGroups = ct.getGroups(names[k]);
                            
                            int thisSeqsCount = 0;
                            for (int n = 0; n < thisSeqsGroups.size(); n++) {
                                if (m->inUsersGroups(thisSeqsGroups[n], Groups)) {
                                    thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]);
                                }
                            }
                            binsize += thisSeqsCount;
                            //if you don't have any seqs from the groups the user wants, then remove you.
                            if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); }
                            else { saveBinNames += names[k] + ","; }
                        }else {
                            binsize += ct.getNumSeqs(names[k]); 
                            saveBinNames += names[k] + ",";
                        }
					}
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
                }

				if (binsize > nseqs) { //keep bin
					newList.push_back(saveBinNames);
                    newLabels.push_back(binLabels[i]);
					if (groupfile != "") {  for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }  }
                    else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) {  ct.remove(newGroupFile[k]); } }  
				}else {  if (countfile != "") {  for(int k = 0; k < names.size(); k++) {  ct.remove(names[k]); } }  }
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.setLabels(newLabels);
                newList.printHeaders(out);
                newList.print(out);
			}
		}	
		
		out.close();
		if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
        if (countfile != "") { 
            if (ct.hasGroupInfo()) {
                vector<string> allGroups = ct.getNamesOfGroups();
                for (int i = 0; i < allGroups.size(); i++) {
                    if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); }
                }

            }
            ct.printTable(outputCountFileName);
            outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); 
        }
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveRareCommand", "processList");
		exit(1);
	}
}
Example #8
0
int ClusterDoturCommand::execute(){
	try {
	
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
		
        
        ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
        
        NameAssignment* nameMap = NULL;
        CountTable* ct = NULL;
        map<string, int> counts;
        if(namefile != "") {	
			nameMap = new NameAssignment(namefile);
			nameMap->readMap();
            cluster->readPhylipFile(phylipfile, nameMap);
            delete nameMap;
		}else if (countfile != "") {
            ct = new CountTable();
            ct->readTable(countfile, false, false);
            cluster->readPhylipFile(phylipfile, ct);
            counts = ct->getNameMap();
            delete ct;
        }else {
            cluster->readPhylipFile(phylipfile, nameMap);
        }
        tag = cluster->getTag();
        
		if (m->getControl_pressed()) { delete cluster; return 0; }
		
		list = cluster->getListVector();
		rabund = cluster->getRAbundVector();
								
		if (outputDir == "") { outputDir += util.hasPath(phylipfile); }
		fileroot = outputDir + util.getRootName(util.getSimpleName(phylipfile));
			
        map<string, string> variables; 
        variables["[filename]"] = fileroot;
        variables["[clustertag]"] = tag;
        string sabundFileName = getOutputFileName("sabund", variables);
        string rabundFileName = getOutputFileName("rabund", variables);
        //if (countfile != "") { variables["[tag2]"] = "unique_list"; }
        string listFileName = getOutputFileName("list", variables);
        
        if (countfile == "") {
            util.openOutputFile(sabundFileName,	sabundFile);
            util.openOutputFile(rabundFileName,	rabundFile);
            outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
            outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
            
        }
		util.openOutputFile(listFileName,	listFile);
        outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
		
		float previousDist = 0.00000;
		float rndPreviousDist = 0.00000;
		oldRAbund = *rabund;
		oldList = *list;
        bool printHeaders = true;
		
        int estart = time(NULL); int loop = 0;
	
		while ((cluster->getSmallDist() <= cutoff) && (cluster->getNSeqs() > 1)){
			if (m->getControl_pressed()) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close();  util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); }
                listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear();  return 0;  }
		
			cluster->update(cutoff);
	
			float dist = cluster->getSmallDist();
			float rndDist = util.ceilDist(dist, precision);
            
            //cout << loop << '\t' << dist << '\t' << oldList.getNumBins() << endl; loop++;

			if(previousDist <= 0.0000 && dist != previousDist)  { printData("unique", counts, printHeaders);                                }
			else if(rndDist != rndPreviousDist)                 { printData(toString(rndPreviousDist,  length-1), counts, printHeaders);    }
		
			previousDist = dist;
			rndPreviousDist = rndDist;
			oldRAbund = *rabund;
			oldList = *list;
		}
	
		if(previousDist <= 0.0000)          { printData("unique", counts, printHeaders);                            }
		else if(rndPreviousDist<cutoff)     { printData(toString(rndPreviousDist, length-1), counts, printHeaders); }
		
        if (countfile == "") {
            sabundFile.close();
            rabundFile.close();
        }
		listFile.close();
		
		delete cluster;  delete list; delete rabund;
		
		//set list file as new current listfile
		string currentName = "";
		itTypes = outputTypes.find("list");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } }
		
		//set rabund file as new current rabundfile
		itTypes = outputTypes.find("rabund");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } }
		
		//set sabund file as new current sabundfile
		itTypes = outputTypes.find("sabund");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } }
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClusterDoturCommand", "execute");
		exit(1);
	}
}
Example #9
0
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){
    try {
        CountTable countTable;
        if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; }
        
        //read countTable
        countTable.readTable(countfile, true, false);
        
        //fill Groups - checks for "all" and for any typo groups
        vector<string> nameGroups = countTable.getNamesOfGroups();
        if (Groups.size() == 0) { Groups = nameGroups; }
        
        
        vector<string> dnamesGroups = designMap->getNamesGroups();
        
        //sanity check
        bool error = false;
        if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number
            //is every group in counttable also in designmap
            for (int i = 0; i < nameGroups.size(); i++) {
                if (m->getControl_pressed()) { break; }
                if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; }
            }
            
        }
        if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; }
        
        //user selected groups - remove some groups from table
        if (Groups.size() != nameGroups.size()) {
            for (int i = 0; i < nameGroups.size(); i++) {
                if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); }
            }
        }
        //ask again in case order changed
        nameGroups = countTable.getNamesOfGroups();
        int numGroups = nameGroups.size();
        
        //create new table
        CountTable newTable;
        vector<string> treatments = designMap->getCategory();
        map<string, vector<int> > clearedMap;
        for (int i = 0; i < treatments.size(); i++) {
            newTable.addGroup(treatments[i]);
            vector<int> temp;
            clearedMap[treatments[i]] = temp;
        }
        treatments = newTable.getNamesOfGroups();
        
        set<string> namesToRemove;
        vector<string> namesOfSeqs = countTable.getNamesOfSeqs();
        for (int i = 0; i < namesOfSeqs.size(); i++) {
            
            if (m->getControl_pressed()) { break; }
            
            vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]);
            map<string, vector<int> > thisSeqsMap = clearedMap;
            
            for (int j = 0; j < numGroups; j++) {
                thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]);
            }
        
            //create new counts for seq for new table
            vector<int> newCounts; int totalAbund = 0;
            for (int j = 0; j < treatments.size(); j++){
                int abund = mergeAbund(thisSeqsMap[treatments[j]]);
                newCounts.push_back(abund);  //order matters, add in count for each treatment in new table.
                totalAbund += abund;
            }
            
            //add seq to new table
            if(totalAbund == 0) {
                namesToRemove.insert(namesOfSeqs[i]);
            }else { newTable.push_back(namesOfSeqs[i], newCounts); }
        }
        
        if (error) { m->setControl_pressed(true); return 0; }
        
        //remove sequences zeroed out by median method
        if (namesToRemove.size() != 0) {
            //print names
            ofstream out;
            string accnosFile = "accnosFile.temp";
            util.openOutputFile(accnosFile, out);
            
            //output to .accnos file
            for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) {
                if (m->getControl_pressed()) {  out.close(); util.mothurRemove(accnosFile); return 0; }
                out << *it << endl;
            }
            out.close();

            //run remove.seqs
            string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile;
            
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
            current->setMothurCalling(true);
            
            Command* removeCommand = new RemoveSeqsCommand(inputString);
            removeCommand->execute();
            
            map<string, vector<string> > filenames = removeCommand->getOutputFiles();
            
            delete removeCommand;
            current->setMothurCalling(false);
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            
            util.mothurRemove(accnosFile);
        }
    
        string thisOutputDir = outputDir;
        if (outputDir == "") {  thisOutputDir += util.hasPath(countfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile));
        variables["[extension]"] = util.getExtension(countfile);
        string outputFileName = getOutputFileName("count", variables);
        outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
        
        newTable.printTable(outputFileName);
        
        return 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "MergeGroupsCommand", "processCountFile");
        exit(1);
    }
}
Example #10
0
int TreeGroupCommand::execute(){
	try {
	
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
		
		if (format == "sharedfile") {
			InputData input(sharedfile, "sharedfile", Groups);
			SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
			lastLabel = lookup->getLabel();
            Groups = lookup->getNamesGroups();
			
            if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command.\n");   return 0; }
			
			//create treemap class from groupmap for tree class to use
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < Groups.size(); i++) {
                nameMap.insert(Groups[i]);
                gps.insert(Groups[i]);
                groupMap[Groups[i]] = Groups[i];
            }
            ct.createTable(nameMap, groupMap, gps);
			
			//fills tree names with shared files groups
			Treenames = lookup->getNamesGroups();
            
			if (m->getControl_pressed()) { return 0; }
			
			//create tree file
			makeSimsShared(input, lookup, ct);
			
			if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);  }  return 0; }
		}else{
			//read in dist file
			filename = inputfile;
            
            ReadMatrix* readMatrix;
			if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }	
			else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
				
			readMatrix->setCutoff(cutoff);
	
            ListVector* list;
            if(namefile != ""){	
                NameAssignment* nameMap = new NameAssignment(namefile);
                nameMap->readMap();
                readMatrix->read(nameMap);
                list = readMatrix->getListVector();
                delete nameMap;
            }else if (countfile != "") {
                CountTable* ct = new CountTable();
                ct->readTable(countfile, true, false);
                readMatrix->read(ct);
                list = readMatrix->getListVector();
                delete ct;
            }else { NameAssignment* nameMap = NULL; readMatrix->read(nameMap); list = readMatrix->getListVector(); }

			SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
			Treenames.clear();
            
			//make treemap
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < list->getNumBins(); i++) {
                string bin = list->get(i);
                nameMap.insert(bin); 
                gps.insert(bin); 
                groupMap[bin] = bin;
                Treenames.push_back(bin);
            }
            ct.createTable(nameMap, groupMap, gps);
			vector<string> namesGroups = ct.getNamesOfGroups();
			
			if (m->getControl_pressed()) { return 0; }
			
			vector< vector<double> > matrix = makeSimsDist(dMatrix, list->getNumBins());
            delete readMatrix;
            delete dMatrix;
			
			if (m->getControl_pressed()) { return 0; }

			//create a new filename
            map<string, string> variables; 
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
			string outputFile = getOutputFileName("tree",variables);	
			outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
				
            Tree* newTree = new Tree(&ct, matrix, Treenames);
            if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
            else { newTree->assembleTree(); }
 
            if (newTree != NULL) {  newTree->createNewickFile(outputFile);  delete newTree; }
			
			if (m->getControl_pressed()) { return 0; } m->mothurOut("Tree complete.\n");
		}
				
		//set tree file as new current treefile
		string currentName = "";
		itTypes = outputTypes.find("tree");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); }
		}
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "execute");
		exit(1);
	}
}
Example #11
0
int ClusterCommand::execute(){
	try {
	
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//phylip file given and cutoff not given - use cluster.classic because it uses less memory and is faster
		if ((format == "phylip") && (cutoff > 10.0)) {
			m->mothurOutEndLine(); m->mothurOut("You are using a phylip file and no cutoff.  I will run cluster.classic to save memory and time."); m->mothurOutEndLine();
			
			//run unique.seqs for deconvolute results
			string inputString = "phylip=" + distfile;
			if (namefile != "") { inputString += ", name=" + namefile; }
            else if (countfile != "") { inputString += ", count=" + countfile; }
			inputString += ", precision=" + toString(precision);
			inputString += ", method=" + method;
			if (hard)	{ inputString += ", hard=T";	}
			else		{ inputString += ", hard=F";	}
			if (sim)	{ inputString += ", sim=T";		}
			else		{ inputString += ", sim=F";		}

			
			m->mothurOutEndLine(); 
			m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine(); 
			m->mothurOut("Running command: cluster.classic(" + inputString + ")"); m->mothurOutEndLine(); 
			
			Command* clusterClassicCommand = new ClusterDoturCommand(inputString);
			clusterClassicCommand->execute();
			delete clusterClassicCommand;
			
			m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine();  

			return 0;
		}
		
		ReadMatrix* read;
		if (format == "column") { read = new ReadColumnMatrix(columnfile, sim); }	//sim indicates whether its a similarity matrix
		else if (format == "phylip") { read = new ReadPhylipMatrix(phylipfile, sim); }
		
		read->setCutoff(cutoff);
		
		NameAssignment* nameMap = NULL;
        CountTable* ct = NULL;
        map<string, int> counts;
		if(namefile != ""){	
			nameMap = new NameAssignment(namefile);
			nameMap->readMap();
            read->read(nameMap);
		}else if (countfile != "") {
            ct = new CountTable();
            ct->readTable(countfile, false, false);
            read->read(ct);
            counts = ct->getNameMap();
        }else { read->read(nameMap); }
		
		list = read->getListVector();
		matrix = read->getDMatrix();
        
		if(countfile != "") {
            rabund = new RAbundVector();
            createRabund(ct, list, rabund); //creates an rabund that includes the counts for the unique list
            delete ct;
        }else { rabund = new RAbundVector(list->getRAbundVector()); }
		delete read;
		
		if (m->control_pressed) { //clean up
			delete list; delete matrix; delete rabund; if(countfile == ""){rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
			listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0;
		}
		
		//create cluster
		if (method == "furthest")	{	cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); }
		else if(method == "nearest"){	cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); }
		else if(method == "average"){	cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust);	}
		else if(method == "weighted"){	cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method, adjust);	}
		tag = cluster->getTag();
		
		if (outputDir == "") { outputDir += m->hasPath(distfile); }
		fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
		
        map<string, string> variables; 
        variables["[filename]"] = fileroot;
        variables["[clustertag]"] = tag;
        string sabundFileName = getOutputFileName("sabund", variables);
        string rabundFileName = getOutputFileName("rabund", variables);
        if (countfile != "") { variables["[tag2]"] = "unique_list"; }
        string listFileName = getOutputFileName("list", variables);
        
        if (countfile == "") {
            m->openOutputFile(sabundFileName,	sabundFile);
            m->openOutputFile(rabundFileName,	rabundFile);
            outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
            outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);

        }
		m->openOutputFile(listFileName,	listFile);
        outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
        list->printHeaders(listFile);
		
		time_t estart = time(NULL);
		float previousDist = 0.00000;
		float rndPreviousDist = 0.00000;
		oldRAbund = *rabund;
		oldList = *list;

		print_start = true;
		start = time(NULL);
		loops = 0;
		double saveCutoff = cutoff;
		
		while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){  
		
			if (m->control_pressed) { //clean up
				delete list; delete matrix; delete rabund; delete cluster;
				if(countfile == "") {rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
                listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0;
			}
		
			if (print_start && m->isTrue(timing)) {
				m->mothurOut("Clustering (" + tag + ") dist " + toString(matrix->getSmallDist()) + "/" 
					+ toString(m->roundDist(matrix->getSmallDist(), precision)) 
					+ "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
				cout.flush();
				print_start = false;
			}

			loops++;

			cluster->update(cutoff);
            
            float dist = matrix->getSmallDist();
			float rndDist;
			if (hard) {
				rndDist = m->ceilDist(dist, precision); 
			}else{
				rndDist = m->roundDist(dist, precision); 
			}

			if(previousDist <= 0.0000 && dist != previousDist){
				printData("unique", counts);
			}
			else if(rndDist != rndPreviousDist){
				printData(toString(rndPreviousDist,  length-1), counts);
			}
		
			previousDist = dist;
			rndPreviousDist = rndDist;
			oldRAbund = *rabund;
			oldList = *list;
		}

		if (print_start && m->isTrue(timing)) {
			m->mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) 
					 + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
			cout.flush();
	 		print_start = false;
		}
		
		if(previousDist <= 0.0000){
			printData("unique", counts);
		}
		else if(rndPreviousDist<cutoff){
			printData(toString(rndPreviousDist, length-1), counts);
		}
		
		delete matrix;
		delete list;
		delete rabund;
		delete cluster;
        if (countfile == "") {
            sabundFile.close();
            rabundFile.close();
        }
		listFile.close();
	
		if (saveCutoff != cutoff) { 
			if (hard)	{  saveCutoff = m->ceilDist(saveCutoff, precision);	}
			else		{	saveCutoff = m->roundDist(saveCutoff, precision);  }

			m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
		}
		
		//set list file as new current listfile
		string current = "";
		itTypes = outputTypes.find("list");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
		}
		
		//set rabund file as new current rabundfile
		itTypes = outputTypes.find("rabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
		}
		
		//set sabund file as new current sabundfile
		itTypes = outputTypes.find("sabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		
		//if (m->isTrue(timing)) {
			m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
		//}
		
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClusterCommand", "execute");
		exit(1);
	}
}
Example #12
0
int ClassifySeqsCommand::execute(){
	try {
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
        
        string outputMethodTag = method;
		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion());	}
		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, util.getRandomNumber(), current->getVersion());				}
        else if(method == "zap"){	
            outputMethodTag = search + "_" + outputMethodTag;
            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
        }
		else {
			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
			m->mothurOutEndLine();
			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion());
		}
		
		if (m->getControl_pressed()) { delete classify; return 0; }
				
        m->mothurOut("Classifying sequences from " + fastafile + " ...\n" );
        
        string baseTName = util.getSimpleName(taxonomyFileName);
        
        //set rippedTaxName to
        string RippedTaxName = "";
        bool foundDot = false;
        for (int i = baseTName.length()-1; i >= 0; i--) {
            if (foundDot && (baseTName[i] != '.')) {  RippedTaxName = baseTName[i] + RippedTaxName; }
            else if (foundDot && (baseTName[i] == '.')) {  break; }
            else if (!foundDot && (baseTName[i] == '.')) {  foundDot = true; }
        }
        
        if (outputDir == "") { outputDir += util.hasPath(fastafile); }
        map<string, string> variables;
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
        variables["[tag]"] = RippedTaxName;
        variables["[tag2]"] = outputMethodTag;
        string newTaxonomyFile = getOutputFileName("taxonomy", variables);
        string newaccnosFile = getOutputFileName("accnos", variables);
        string tempTaxonomyFile = outputDir + util.getRootName(util.getSimpleName(fastafile)) + "taxonomy.temp";
        string taxSummary = getOutputFileName("taxsummary", variables);
        
        if ((method == "knn") && (search == "distance")) {
            string DistName = getOutputFileName("matchdist", variables);
            classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
        }
        
        outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
        outputNames.push_back(taxSummary);	outputTypes["taxsummary"].push_back(taxSummary);
        
        long start = time(NULL);
        int numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastafile);
        
        if (!util.isBlank(newaccnosFile)) { m->mothurOut("\n[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences.\n");
            outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
        }else { util.mothurRemove(newaccnosFile); }
        
        m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.\n\n");
        start = time(NULL);
        
        //read namefile
        map<string, vector<string> > nameMap;
        map<string,  vector<string> >::iterator itNames;
        if(namefile != "") {
            m->mothurOut("Reading " + namefile + "..."); cout.flush();
            nameMap.clear(); //remove old names
            util.readNames(namefile, nameMap);
            m->mothurOut("  Done.\n");
        }
        
        //output taxonomy with the unclassified bins added
        ifstream inTax;
        util.openInputFile(newTaxonomyFile, inTax);
        
        ofstream outTax;
        string unclass = newTaxonomyFile + ".unclass.temp";
        util.openOutputFile(unclass, outTax);
        
        //get maxLevel from phylotree so you know how many 'unclassified's to add
        int maxLevel = classify->getMaxLevel();
        
        //read taxfile - this reading and rewriting is done to preserve the confidence scores.
        string name, taxon;
        GroupMap* groupMap = NULL;
        CountTable* ct = NULL;
        PhyloSummary* taxaSum;
        
        if (hasCount) {
            ct = new CountTable();
            ct->readTable(countfile, true, false);
            taxaSum = new PhyloSummary(ct, relabund, printlevel);
        }else {
            if (groupfile != "") {  groupMap = new GroupMap(groupfile); groupMap->readMap(); }
            taxaSum = new PhyloSummary(groupMap, relabund, printlevel);
        }
        
        while (!inTax.eof()) {
            if (m->getControl_pressed()) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);	} delete classify; return 0; }
            
            inTax >> name; util.gobble(inTax);
            taxon = util.getline(inTax); util.gobble(inTax);
            
            string newTax = util.addUnclassifieds(taxon, maxLevel, probs);
            
            outTax << name << '\t' << newTax << endl;
            
            if (namefile != "") {
                itNames = nameMap.find(name);
                
                if (itNames == nameMap.end()) {
                    m->mothurOut(name + " is not in your name file please correct.\n");  exit(1);
                }else{
                    //add it as many times as there are identical seqs
                    for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); }
                    itNames->second.clear();
                    nameMap.erase(itNames->first);
                }
            }else { taxaSum->addSeqToTree(name, newTax); }
        }
        inTax.close();
        outTax.close();
        
        util.mothurRemove(newTaxonomyFile);
        util.renameFile(unclass, newTaxonomyFile);
        
        if (m->getControl_pressed()) {  outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);	} delete classify; return 0; }
        
        //print summary file
        ofstream outTaxTree;
        util.openOutputFile(taxSummary, outTaxTree);
        taxaSum->print(outTaxTree, output);
        outTaxTree.close();
        
        if (ct != NULL) { delete ct; }
        if (groupMap != NULL) { delete groupMap; } delete taxaSum;
        util.mothurRemove(tempTaxonomyFile);
        delete classify;
        
        m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences.\n\n");

        m->mothurOut("\nOutput File Names: \n");
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
        m->mothurOutEndLine();
		
		//set taxonomy file as new current taxonomyfile
		string currentName = "";
		itTypes = outputTypes.find("taxonomy");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } }
		
		currentName = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } }

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClassifySeqsCommand", "execute");
		exit(1);
	}
}
Example #13
0
int SeqSummaryCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
        int start = time(NULL);
        
		//set current fasta to fastafile
		m->setFastaFile(fastafile);
		
        map<string, string> variables; 
		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
		string summaryFile = getOutputFileName("summary",variables);
				
		long long numSeqs = 0;
        long long size = 0;
        long long numUniques = 0;
		map<int, long long> startPosition;
		map<int, long long> endPosition;
		map<int, long long> seqLength;
		map<int, long long> ambigBases;
		map<int, long long> longHomoPolymer;
		
        if (namefile != "") { nameMap = m->readNames(namefile); numUniques = nameMap.size(); }
        else if (countfile != "") {
            CountTable ct;
            ct.readTable(countfile, false, false);
            nameMap = ct.getNameMap();
            size = ct.getNumSeqs();
            numUniques = ct.getNumUniqueSeqs();
        }
		
		if (m->control_pressed) { return 0; }
			

			vector<unsigned long long> positions; 
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				positions = m->divideFile(fastafile, processors);
				for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
			#else
				positions = m->setFilePosFasta(fastafile, numSeqs); 
                if (numSeqs < processors) { processors = numSeqs; }
		
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			#endif
			

			if(processors == 1){
				numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
			}else{
				numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
			}
			
			if (m->control_pressed) {  return 0; }
			
		
        
        //set size
        if (countfile != "") {}//already set
        else if (namefile == "") { size = numSeqs; }
        else { for (map<int, long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++) { size += it->second; } }
        
        if ((namefile != "") || (countfile != "")) {
            string type = "count";
            if (namefile != "") { type = "name"; }
            if (numSeqs != numUniques) { // do fasta and name/count files match
                m->mothurOut("[ERROR]: Your " + type + " file contains " + toString(numUniques) + " unique sequences, but your fasta file contains " + toString(numSeqs) + ". File mismatch detected, quitting command.\n"); m->control_pressed = true;
            }
        }
        
        if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
        
        long long ptile0_25	= 1+(long long)(size * 0.025); //number of sequences at 2.5%
        long long ptile25		= 1+(long long)(size * 0.250); //number of sequences at 25%
        long long ptile50		= 1+(long long)(size * 0.500);
        long long ptile75		= 1+(long long)(size * 0.750);
        long long ptile97_5	= 1+(long long)(size * 0.975);
        long long ptile100	= (long long)(size);
        vector<int> starts; starts.resize(7,0); vector<int> ends; ends.resize(7,0); vector<int> ambigs; ambigs.resize(7,0); vector<int> lengths; lengths.resize(7,0); vector<int> homops; homops.resize(7,0);
        
		//find means
		long long meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
        meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
        //minimum
        if ((startPosition.begin())->first == -1) { starts[0] = 0; }
        else {starts[0] = (startPosition.begin())->first; }
        long long totalSoFar = 0;
        //set all values to min
        starts[1] = starts[0]; starts[2] = starts[0]; starts[3] = starts[0]; starts[4] = starts[0]; starts[5] = starts[0];
        int lastValue = 0;
        for (map<int, long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++) {
            int value = it->first; if (value == -1) { value = 0; }
            meanStartPosition += (value*it->second);
            totalSoFar += it->second;
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  starts[1] = value;   } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) { starts[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  starts[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  starts[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  starts[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  starts[6] = value; } //save value
            lastValue = totalSoFar;
        }
        starts[6] = (startPosition.rbegin())->first;
        
        if ((endPosition.begin())->first == -1) { ends[0] = 0; }
        else {ends[0] = (endPosition.begin())->first; }
        totalSoFar = 0;
        //set all values to min
        ends[1] = ends[0]; ends[2] = ends[0]; ends[3] = ends[0]; ends[4] = ends[0]; ends[5] = ends[0];
        lastValue = 0;
        for (map<int, long long>::iterator it = endPosition.begin(); it != endPosition.end(); it++) {
            int value = it->first; if (value == -1) { value = 0; }
            meanEndPosition += (value*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  ends[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) { ends[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  ends[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  ends[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  ends[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {   ends[6] = value; } //save value
            lastValue = totalSoFar;
        }
        ends[6] = (endPosition.rbegin())->first;
        
        if ((seqLength.begin())->first == -1) { lengths[0] = 0; }
        else {lengths[0] = (seqLength.begin())->first; }
        //set all values to min
        lengths[1] = lengths[0]; lengths[2] = lengths[0]; lengths[3] = lengths[0]; lengths[4] = lengths[0]; lengths[5] = lengths[0];
        totalSoFar = 0;
        lastValue = 0;
        for (map<int, long long>::iterator it = seqLength.begin(); it != seqLength.end(); it++) {
            int value = it->first;
            meanSeqLength += (value*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  lengths[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) {   lengths[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  lengths[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  lengths[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  lengths[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  lengths[6] = value; } //save value
            lastValue = totalSoFar;
        }
        lengths[6] = (seqLength.rbegin())->first;
                
        if ((ambigBases.begin())->first == -1) { ambigs[0] = 0; }
        else {ambigs[0] = (ambigBases.begin())->first; }
        //set all values to min
        ambigs[1] = ambigs[0]; ambigs[2] = ambigs[0]; ambigs[3] = ambigs[0]; ambigs[4] = ambigs[0]; ambigs[5] = ambigs[0];
        totalSoFar = 0;
        lastValue = 0;
        for (map<int, long long>::iterator it = ambigBases.begin(); it != ambigBases.end(); it++) {
            int value = it->first;
            meanAmbigBases += (value*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  ambigs[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) {   ambigs[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  ambigs[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  ambigs[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  ambigs[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  ambigs[6] = value; } //save value
            lastValue = totalSoFar;
        }
        ambigs[6] = (ambigBases.rbegin())->first;
        
        
        if ((longHomoPolymer.begin())->first == -1) { homops[0] = 0; }
        else {homops[0] = (longHomoPolymer.begin())->first; }
        //set all values to min
        homops[1] = homops[0]; homops[2] = homops[0]; homops[3] = homops[0]; homops[4] = homops[0]; homops[5] = homops[0];
        totalSoFar = 0;
        lastValue = 0;
        for (map<int, long long>::iterator it = longHomoPolymer.begin(); it != longHomoPolymer.end(); it++) {
            int value = it->first;
            meanLongHomoPolymer += (it->first*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  homops[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) {   homops[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  homops[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  homops[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  homops[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  homops[6] = value; } //save value
            lastValue = totalSoFar;
        }
        homops[6] = (longHomoPolymer.rbegin())->first;
        		      
        double meanstartPosition, meanendPosition, meanseqLength, meanambigBases, meanlongHomoPolymer;
                
		meanstartPosition = meanStartPosition / (double) size; meanendPosition = meanEndPosition /(double) size; meanlongHomoPolymer = meanLongHomoPolymer / (double) size; meanseqLength = meanSeqLength / (double) size; meanambigBases = meanAmbigBases /(double) size;
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer\tNumSeqs"); m->mothurOutEndLine();
		m->mothurOut("Minimum:\t" + toString(starts[0]) + "\t" + toString(ends[0]) + "\t" + toString(lengths[0]) + "\t" + toString(ambigs[0]) + "\t" + toString(homops[0]) + "\t" + toString(1)); m->mothurOutEndLine();
		m->mothurOut("2.5%-tile:\t" + toString(starts[1]) + "\t" + toString(ends[1]) + "\t" + toString(lengths[1]) + "\t" + toString(ambigs[1]) + "\t" + toString(homops[1]) + "\t" + toString(ptile0_25)); m->mothurOutEndLine();
		m->mothurOut("25%-tile:\t" + toString(starts[2]) + "\t" + toString(ends[2]) + "\t" + toString(lengths[2]) + "\t" + toString(ambigs[2]) + "\t" + toString(homops[2]) + "\t" + toString(ptile25)); m->mothurOutEndLine();
		m->mothurOut("Median: \t" + toString(starts[3]) + "\t" + toString(ends[3]) + "\t" + toString(lengths[3]) + "\t" + toString(ambigs[3]) + "\t" + toString(homops[3]) + "\t" + toString(ptile50)); m->mothurOutEndLine();
		m->mothurOut("75%-tile:\t" + toString(starts[4]) + "\t" + toString(ends[4]) + "\t" + toString(lengths[4]) + "\t" + toString(ambigs[4]) + "\t" + toString(homops[4]) + "\t" + toString(ptile75)); m->mothurOutEndLine();
		m->mothurOut("97.5%-tile:\t" + toString(starts[5]) + "\t" + toString(ends[5]) + "\t" + toString(lengths[5]) + "\t" + toString(ambigs[5]) + "\t" + toString(homops[5]) + "\t" + toString(ptile97_5)); m->mothurOutEndLine();
		m->mothurOut("Maximum:\t" + toString(starts[6]) + "\t" + toString(ends[6]) + "\t" + toString(lengths[6]) + "\t" + toString(ambigs[6]) + "\t" + toString(homops[6]) + "\t" + toString(ptile100)); m->mothurOutEndLine();
		m->mothurOut("Mean:\t" + toString(meanstartPosition) + "\t" + toString(meanendPosition) + "\t" + toString(meanseqLength) + "\t" + toString(meanambigBases) + "\t" + toString(meanlongHomoPolymer)); m->mothurOutEndLine();
		if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
		else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); }
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(summaryFile); m->mothurOutEndLine();	outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
		m->mothurOutEndLine();

        if ((namefile == "") && (countfile == "")) {  m->mothurOut("It took " + toString(time(NULL) - start) + " secs to summarize " + toString(numSeqs) + " sequences.\n");  }
        else{ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to summarize " + toString(size) + " sequences.\n");   }
        
        //set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("summary");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSummaryFile(current); }
		}
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SeqSummaryCommand", "execute");
		exit(1);
	}
}
Example #14
0
int OptiBlastMatrix::readBlast(){
    try {
        Utils util;
        map<string, long long> nameAssignment;
        if (namefile != "") { util.readNames(namefile, nameAssignment); }
        else if (countfile != "") {
            CountTable ct; ct.readTable(countfile, false, true);
            map<string, int> temp = ct.getNameMap();
            for (map<string, int>::iterator it = temp.begin(); it!= temp.end(); it++) {  nameAssignment[it->first] = it->second; }
        }
        else { readBlastNames(nameAssignment);  }
        int count = 0;
        for (map<string, long long>::iterator it = nameAssignment.begin(); it!= nameAssignment.end(); it++) {
            it->second = count; count++;
            nameMap.push_back(it->first);
            overlapNameMap.push_back(it->first);
        }
        
        m->mothurOut("Reading Blast File... "); cout.flush();
        
        string firstName, secondName, eScore, currentRow; currentRow = "";
        string repeatName = "";
        float distance, thisoverlap, refScore;
        float percentId;
        float numBases, mismatch, gap, startQuery, endQuery, startRef, endRef, score, lengthThisSeq;
        map<string, float> thisRowsBlastScores;
        
        ///////////////////// Read to eliminate singletons ///////////////////////
        ifstream fileHandle;
        util.openInputFile(distFile, fileHandle);
        
        map<int, int> singletonIndexSwap;
        map<int, int> blastSingletonIndexSwap;
        vector<bool> singleton; singleton.resize(nameAssignment.size(), true);
        vector<bool> overlapSingleton; overlapSingleton.resize(nameAssignment.size(), true);
        vector< map<string,float> > dists;  dists.resize(nameAssignment.size());
        
        if (!fileHandle.eof()) {
            //read in line from file
            fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
            util.gobble(fileHandle);
            
            currentRow = firstName;
            lengthThisSeq = numBases;
            repeatName = firstName + secondName;
            
            if (firstName == secondName) {   refScore = score;  }
            else{
                thisRowsBlastScores[secondName] = score;
                
                //calc overlap score
                thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty);
                
                //if there is a valid overlap, add it
                if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) {
                    //convert name to number
                    map<string,long long>::iterator itA = nameAssignment.find(firstName);
                    map<string,long long>::iterator itB = nameAssignment.find(secondName);
                    if(itA == nameAssignment.end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
                    if(itB == nameAssignment.end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }

                    int indexA = (itA->second);
                    int indexB = (itB->second);
                    overlapSingleton[indexA] = false;
                    overlapSingleton[indexB] = false;
                    blastSingletonIndexSwap[indexA] = indexA;
                    blastSingletonIndexSwap[indexB] = indexB;
                }
            }
        }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); }
        
        
        while(fileHandle){  //let's assume it's a triangular matrix...
            
            if (m->getControl_pressed()) { fileHandle.close(); return 0; }
            
            //read in line from file
            fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
            util.gobble(fileHandle);
            
            string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file
            
            //if this is a new pairing
            if (temp != repeatName) {
                repeatName = temp;
                
                if (currentRow == firstName) {
                    if (firstName == secondName) {  refScore = score; }
                    else{
                        //save score
                        thisRowsBlastScores[secondName] = score;
                        
                        //calc overlap score
                        thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty);
                        
                        //if there is a valid overlap, add it
                        if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) {
                            //convert name to number
                            map<string,long long>::iterator itA = nameAssignment.find(firstName);
                            map<string,long long>::iterator itB = nameAssignment.find(secondName);
                            if(itA == nameAssignment.end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
                            if(itB == nameAssignment.end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                            int indexA = (itA->second);
                            int indexB = (itB->second);
                            overlapSingleton[indexA] = false;
                            overlapSingleton[indexB] = false;
                            blastSingletonIndexSwap[indexA] = indexA;
                            blastSingletonIndexSwap[indexB] = indexB;
                        }
                    } //end else
                }else { //end row
                    //convert blast scores to distance and add cell to sparse matrix if we can
                    map<string, float>::iterator it;
                    map<string, float>::iterator itDist;
                    for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) {
                        distance = 1.0 - (it->second / refScore);
                        
                        //do we already have the distance calculated for b->a
                        map<string,long long>::iterator itA = nameAssignment.find(currentRow);
                        map<string,long long>::iterator itB = nameAssignment.find(it->first);
                        itDist = dists[itB->second].find(itA->first);
                        
                        //if we have it then compare
                        if (itDist != dists[itB->second].end()) {
                            
                            //if you want the minimum blast score ratio, then pick max distance
                            if(minWanted) {	 distance = max(itDist->second, distance);  }
                            else{	distance = min(itDist->second, distance);  }
                            
                            //is this distance below cutoff
                            if (distance <= cutoff) {
                                int indexA = (itA->second);
                                int indexB = (itB->second);
                                singleton[indexA] = false;
                                singleton[indexB] = false;
                                singletonIndexSwap[indexA] = indexA;
                                singletonIndexSwap[indexB] = indexB;
                            }
                            //not going to need this again
                            dists[itB->second].erase(itDist);
                        }else { //save this value until we get the other ratio
                            dists[itA->second][it->first] = distance;
                        }
                    }
                    //clear out last rows info
                    thisRowsBlastScores.clear();
                    
                    currentRow = firstName;
                    lengthThisSeq = numBases;
                    
                    //add this row to thisRowsBlastScores
                    if (firstName == secondName) {   refScore = score;  }
                    else{ //add this row to thisRowsBlastScores
                        thisRowsBlastScores[secondName] = score;
                        
                        //calc overlap score
                        thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty);
                        
                        //if there is a valid overlap, add it
                        if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) {
                            //convert name to number
                            map<string,long long>::iterator itA = nameAssignment.find(firstName);
                            map<string,long long>::iterator itB = nameAssignment.find(secondName);
                            if(itA == nameAssignment.end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
                            if(itB == nameAssignment.end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                            int indexA = (itA->second);
                            int indexB = (itB->second);
                            overlapSingleton[indexA] = false;
                            overlapSingleton[indexB] = false;
                            blastSingletonIndexSwap[indexA] = indexA;
                            blastSingletonIndexSwap[indexB] = indexB;
                        }
                    }
                }//end if current row
            }//end if repeat
        }
        fileHandle.close();
        
        //convert blast scores to distance and add cell to sparse matrix if we can
        map<string, float>::iterator it;
        map<string, float>::iterator itDist;
        for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) {
            distance = 1.0 - (it->second / refScore);
            
            //do we already have the distance calculated for b->a
            map<string,long long>::iterator itA = nameAssignment.find(currentRow);
            map<string,long long>::iterator itB = nameAssignment.find(it->first);
            itDist = dists[itB->second].find(itA->first);
            
            //if we have it then compare
            if (itDist != dists[itB->second].end()) {
                
                //if you want the minimum blast score ratio, then pick max distance
                if(minWanted) {	 distance = max(itDist->second, distance);  }
                else{	distance = min(itDist->second, distance);  }
                
                //is this distance below cutoff
                if (distance <= cutoff) {
                    int indexA = (itA->second);
                    int indexB = (itB->second);
                    singleton[indexA] = false;
                    singleton[indexB] = false;
                    singletonIndexSwap[indexA] = indexA;
                    singletonIndexSwap[indexB] = indexB;
                }
                //not going to need this again
                dists[itB->second].erase(itDist);
            }else { //save this value until we get the other ratio
                dists[itA->second][it->first] = distance;
            }
        }
        //clear out info
        thisRowsBlastScores.clear();
        dists.clear();
        
        //////////////////////////////////////////////////////////////////////////
        int nonSingletonCount = 0;
        for (int i = 0; i < singleton.size(); i++) {
            if (!singleton[i]) { //if you are a singleton
                singletonIndexSwap[i] = nonSingletonCount;
                nonSingletonCount++;
            }else { singletons.push_back(nameMap[i]); }
        }
        singleton.clear();
        
        int overlapNonSingletonCount = 0;
        for (int i = 0; i < overlapSingleton.size(); i++) {
            if (!overlapSingleton[i]) { //if you are a singleton
                blastSingletonIndexSwap[i] = overlapNonSingletonCount;
                overlapNonSingletonCount++;
            }
        }
        overlapSingleton.clear();
        
        ifstream in;
        util.openInputFile(distFile, in);
        
        dists.resize(nameAssignment.size());
        closeness.resize(nonSingletonCount);
        blastOverlap.resize(overlapNonSingletonCount);
        
        map<string, string> names;
        if (namefile != "") {
            util.readNames(namefile, names);
            for (int i = 0; i < singletons.size(); i++) {
                singletons[i] = names[singletons[i]];
            }
        }
        
        m->mothurOut(" halfway ... "); cout.flush();
        
        if (!in.eof()) {
            //read in line from file
            in >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
            util.gobble(fileHandle);
            
            currentRow = firstName;
            lengthThisSeq = numBases;
            repeatName = firstName + secondName;
            
            if (firstName == secondName) {   refScore = score;  }
            else{
                //convert name to number
                map<string,long long>::iterator itA = nameAssignment.find(firstName);
                map<string,long long>::iterator itB = nameAssignment.find(secondName);
                if(itA == nameAssignment.end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
                if(itB == nameAssignment.end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                
                thisRowsBlastScores[secondName] = score;
                
                if (namefile != "") {
                    firstName = names[firstName];  //redundant names
                    secondName = names[secondName]; //redundant names
                }
                
                nameMap[singletonIndexSwap[itA->second]] = firstName;
                nameMap[singletonIndexSwap[itB->second]] = secondName;
                
                //calc overlap score
                thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty);
                
                //if there is a valid overlap, add it
                if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) {
                    int indexA = (itA->second);
                    int indexB = (itB->second);
                    
                    int newB = blastSingletonIndexSwap[indexB];
                    int newA = blastSingletonIndexSwap[indexA];
                    blastOverlap[newA].insert(newB);
                    blastOverlap[newB].insert(newA);
                    
                    overlapNameMap[newA] = firstName;
                    overlapNameMap[newB] = secondName;
                }
            }
        }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); }
Example #15
0
//**********************************************************************************************************************
int GetRAbundCommand::processList(ofstream& out){
	try {
        CountTable ct;
        ct.readTable(countfile, false, false);
        
        InputData input(inputfile, format);
        ListVector* list = input.getListVector();
        string lastLabel = list->getLabel();
        
        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
        set<string> processedLabels;
        set<string> userLabels = labels;
        
        if (m->control_pressed) {  delete list;  return 0; }
        
        while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
            
            if(allLines == 1 || labels.count(list->getLabel()) == 1){
                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                
                if (m->control_pressed) {   delete list;  return 0;  }
                
                RAbundVector* rabund = new RAbundVector();
                createRabund(ct, list, rabund);
                
                if(sorted)	{   rabund->print(out);				}
                else		{	rabund->nonSortedPrint(out);	}
                
                delete rabund;
                processedLabels.insert(list->getLabel());
                userLabels.erase(list->getLabel());
            }
            
            if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                string saveLabel = list->getLabel();
                
                delete list;
                list = input.getListVector(lastLabel);
                
                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                
                if (m->control_pressed) {    delete list;  return 0;  }
                
                RAbundVector* rabund = new RAbundVector();
                createRabund(ct, list, rabund);
                
                if(sorted)	{   rabund->print(out);				}
                else		{	rabund->nonSortedPrint(out);	}
                
                delete rabund;
                processedLabels.insert(list->getLabel());
                userLabels.erase(list->getLabel());
                
                //restore real lastlabel to save below
                list->setLabel(saveLabel);
            }
            
            lastLabel = list->getLabel();
            
            delete list;
            list = input.getListVector();
        }
        
        //output error messages about any remaining user labels
        set<string>::iterator it;
        bool needToRun = false;
        for (it = userLabels.begin(); it != userLabels.end(); it++) {
            m->mothurOut("Your file does not include the label " + *it);
            if (processedLabels.count(lastLabel) != 1) {
                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
                needToRun = true;
            }else {
                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
            }
        }
        
        //run last label if you need to
        if (needToRun == true)  {
            if (list != NULL) {	delete list;	}
            list = input.getListVector(lastLabel);
            
            m->mothurOut(list->getLabel()); m->mothurOutEndLine();
            
            if (m->control_pressed) {   delete list;  return 0; }
            
            RAbundVector* rabund = new RAbundVector();
            createRabund(ct, list, rabund);
            
            if(sorted)	{   rabund->print(out);				}
            else		{	rabund->nonSortedPrint(out);	}
            
            delete rabund;
            delete list;
        }

        return 0;
    }
	catch(exception& e) {
		m->errorOut(e, "GetRAbundCommand", "processList");
		exit(1);
	}
}
Example #16
0
int DeconvoluteCommand::execute() {	
	try {
		
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
        
		//prepare filenames and open files
        map<string, string> variables; 
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
        string outNameFile = getOutputFileName("name", variables);
        string outCountFile = getOutputFileName("count", variables);
        variables["[extension]"] = util.getExtension(fastafile);
		string outFastaFile = getOutputFileName("fasta", variables);
		
		map<string, string> nameMap;
		map<string, string>::iterator itNames;
		if (namefile != "")  {
            util.readNames(namefile, nameMap);
            if (namefile == outNameFile){
                //prepare filenames and open files
                map<string, string> mvariables;
                mvariables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
                mvariables["[tag]"] = "unique";
                outNameFile = getOutputFileName("name", mvariables);
            }
        }
        CountTable ct;
        if (countfile != "")  {  
            ct.readTable(countfile, true, false);
            if (countfile == outCountFile){
                //prepare filenames and open files
                map<string, string> mvariables;
                mvariables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
                mvariables["[tag]"] = "unique";
                outCountFile = getOutputFileName("count", mvariables);   }
        }
		
		if (m->getControl_pressed()) { return 0; }
		
		ifstream in; 
		util.openInputFile(fastafile, in);
		
		ofstream outFasta;
		util.openOutputFile(outFastaFile, outFasta);
		
		map<string, string> sequenceStrings; //sequenceString -> list of names.  "atgc...." -> seq1,seq2,seq3.
		map<string, string>::iterator itStrings;
		set<string> nameInFastaFile; //for sanity checking
		set<string>::iterator itname;
		vector<string> nameFileOrder;
        CountTable newCt;
		int count = 0;
		while (!in.eof()) {
			
			if (m->getControl_pressed()) { in.close(); outFasta.close(); util.mothurRemove(outFastaFile); return 0; }
			
			Sequence seq(in);
			
			if (seq.getName() != "") {
				
				//sanity checks
				itname = nameInFastaFile.find(seq.getName());
				if (itname == nameInFastaFile.end()) { nameInFastaFile.insert(seq.getName());  }
				else { m->mothurOut("[ERROR]: You already have a sequence named " + seq.getName() + " in your fasta file, sequence names must be unique, please correct."); m->mothurOutEndLine(); }

				itStrings = sequenceStrings.find(seq.getAligned());
				
				if (itStrings == sequenceStrings.end()) { //this is a new unique sequence
					//output to unique fasta file
					seq.printSequence(outFasta);
					
					if (namefile != "") {
						itNames = nameMap.find(seq.getName());
						
						if (itNames == nameMap.end()) { //namefile and fastafile do not match
							m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine();
						}else {
                            if (format == "name") { sequenceStrings[seq.getAligned()] = itNames->second;  nameFileOrder.push_back(seq.getAligned());
                            }else {  newCt.push_back(seq.getName(), util.getNumNames(itNames->second)); sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned()); }
						}
					}else if (countfile != "") {
                        if (format == "name") {
                            int numSeqs = ct.getNumSeqs(seq.getName());
                            string expandedName = seq.getName()+"_0";
                            for (int i = 1; i < numSeqs; i++) {  expandedName += "," + seq.getName() + "_" + toString(i);  }
                            sequenceStrings[seq.getAligned()] = expandedName;  nameFileOrder.push_back(seq.getAligned());
                        }else {
                            ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
                            sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned());
                        }
                    }else {
                        if (format == "name") { sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned()); }
                        else {  newCt.push_back(seq.getName()); sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned()); }
                    }
				}else { //this is a dup
					if (namefile != "") {
						itNames = nameMap.find(seq.getName());
						
						if (itNames == nameMap.end()) { //namefile and fastafile do not match
							m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine();
						}else {
                            if (format == "name") { sequenceStrings[seq.getAligned()] += "," + itNames->second;  }
                            else {  int currentReps = newCt.getNumSeqs(itStrings->second);  newCt.setNumSeqs(itStrings->second, currentReps+(util.getNumNames(itNames->second)));  }
						}
                    }else if (countfile != "") {
                        if (format == "name") {
                            int numSeqs = ct.getNumSeqs(seq.getName());
                            string expandedName = seq.getName()+"_0";
                            for (int i = 1; i < numSeqs; i++) {  expandedName += "," + seq.getName() + "_" + toString(i);  }
                            sequenceStrings[seq.getAligned()] += "," + expandedName;
                        }else {
                            int num = ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
                            if (num != 0) { //its in the table
                                ct.mergeCounts(itStrings->second, seq.getName()); //merges counts and saves in uniques name
                            }
                        }
                    }else {
                        if (format == "name") {  sequenceStrings[seq.getAligned()] += "," + seq.getName(); }
                        else {  int currentReps = newCt.getNumSeqs(itStrings->second); newCt.setNumSeqs(itStrings->second, currentReps+1);  }
                    }
				}
				count++;
			}
			
			util.gobble(in);
			
			if(count % 1000 == 0)	{ m->mothurOutJustToScreen(toString(count) + "\t" + toString(sequenceStrings.size()) + "\n");	}
		}
		
		if(count % 1000 != 0)	{ m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine();	}
		
		in.close();
		outFasta.close();
		
		if (m->getControl_pressed()) { util.mothurRemove(outFastaFile); return 0; }
        
		//print new names file
		ofstream outNames;
		if (format == "name") { util.openOutputFile(outNameFile, outNames); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile);   }
        else { util.openOutputFile(outCountFile, outNames); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile);                }
        
        if ((countfile != "") && (format == "count")) { ct.printHeaders(outNames); }
        else if ((countfile == "") && (format == "count")) { newCt.printHeaders(outNames); }
		
		for (int i = 0; i < nameFileOrder.size(); i++) {
			if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); outNames.close(); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; }
			
			itStrings = sequenceStrings.find(nameFileOrder[i]);
			
			if (itStrings != sequenceStrings.end()) {
                if (format == "name") {
                    //get rep name
                    int pos = (itStrings->second).find_first_of(',');
                    
                    if (pos == string::npos) { // only reps itself
                        outNames << itStrings->second << '\t' << itStrings->second << endl;
                    }else {
                        outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl;
                    }
                }else {
                    if (countfile != "") {  ct.printSeq(outNames, itStrings->second);  }
                    else if (format == "count")  {  newCt.printSeq(outNames, itStrings->second);  }
                }
			}else{ m->mothurOut("[ERROR]: mismatch in namefile print."); m->mothurOutEndLine(); m->setControl_pressed(true); }
		}
		outNames.close();
		
		if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); }  return 0; }
		
		m->mothurOut("\nOutput File Names: \n"); 
		outputNames.push_back(outFastaFile);   outputTypes["fasta"].push_back(outFastaFile);  
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		//set fasta file as new current fastafile
		string currentName = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); }
		}
		
		itTypes = outputTypes.find("name");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); }
		}
        
        itTypes = outputTypes.find("count");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); }
		}
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "DeconvoluteCommand", "execute");
		exit(1);
	}
}
Example #17
0
//**********************************************************************************************************************
int SharedCommand::createSharedFromListGroup() {
	try {

        GroupMap* groupMap = NULL;
        CountTable* countTable = NULL;
        pickedGroups = false;
        if (groupfile != "") {
            groupMap = new GroupMap(groupfile);

            int groupError = groupMap->readMap();
            if (groupError == 1) { delete groupMap; return 0; }
            vector<string> allGroups = groupMap->getNamesOfGroups();
            if (Groups.size() == 0) { Groups = allGroups; }
            else { pickedGroups = true; }
        }else{
            countTable = new CountTable();
            countTable->readTable(countfile, true, false);
            vector<string> allGroups = countTable->getNamesOfGroups();
            if (Groups.size() == 0) { Groups = allGroups; }
            else { pickedGroups = true; }
        }
        int numGroups = Groups.size();
        if (m->getControl_pressed()) { return 0; }

        ofstream out;
        string filename = "";
        if (!pickedGroups) {
            string filename = listfile;
            if (outputDir == "") { outputDir += util.hasPath(filename); }

            map<string, string> variables;
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
            filename = getOutputFileName("shared",variables);
            outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
            util.openOutputFile(filename, out);
        }

        //set fileroot
        fileroot = outputDir + util.getRootName(util.getSimpleName(listfile));
        map<string, string> variables;
		variables["[filename]"] = fileroot;
        string errorOff = "no error";

        InputData input(listfile, "shared", Groups);
        SharedListVector* SharedList = input.getSharedListVector();
        string lastLabel = SharedList->getLabel();
        SharedRAbundVectors* lookup;

        if (m->getControl_pressed()) {
            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
            out.close(); if (!pickedGroups) { util.mothurRemove(filename); }
            return 0;
        }

        //sanity check
        vector<string> namesSeqs;
        int numGroupNames = 0;
        if (current->getGroupMode() == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
        else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
        int error = ListGroupSameSeqs(namesSeqs, SharedList);

        if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) {  //if the user has not specified any groups and their files don't match exit with error
            m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct.\n");  m->setControl_pressed(true);

            out.close(); if (!pickedGroups) { util.mothurRemove(filename); } //remove blank shared file you made

            //delete memory
            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
            return 0;
        }

        if (error == 1) { m->setControl_pressed(true); }

        //if user has specified groups make new groupfile for them
        if ((pickedGroups) && (current->getGroupMode() == "group")) { //make new group file
            string groups = "";
            if (numGroups < 4) {
                for (int i = 0; i < numGroups-1; i++) {
                    groups += Groups[i] + ".";
                }
                groups+=Groups[numGroups-1];
            }else { groups = "merge"; }
            map<string, string> variables;
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(listfile));
            variables["[group]"] = groups;
            string newGroupFile = getOutputFileName("group",variables);
            outputTypes["group"].push_back(newGroupFile);
            outputNames.push_back(newGroupFile);
            ofstream outGroups;
            util.openOutputFile(newGroupFile, outGroups);

            vector<string> names = groupMap->getNamesSeqs();
            string groupName;
            for (int i = 0; i < names.size(); i++) {
                groupName = groupMap->getGroup(names[i]);
                if (isValidGroup(groupName, Groups)) {
                    outGroups << names[i] << '\t' << groupName << endl;
                }
            }
            outGroups.close();
        }

        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
        set<string> processedLabels;
        set<string> userLabels = labels;
        bool printHeaders = true;
    
        while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
            if (m->getControl_pressed()) {
                delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                return 0;
            }

            if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){

                lookup = SharedList->getSharedRAbundVector();

                m->mothurOut(lookup->getLabel()+"\n"); 

                if (m->getControl_pressed()) {
                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                    delete lookup;
                    if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                    return 0;
                }

                //if picked groups must split the shared file by label
                if (pickedGroups) {
                    string filename = listfile;
                    if (outputDir == "") { outputDir += util.hasPath(filename); }

                    map<string, string> variables;
                    variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                    variables["[distance]"] = lookup->getLabel();
                    filename = getOutputFileName("shared",variables);
                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                    ofstream out2;
                    util.openOutputFile(filename, out2);

                    lookup->eliminateZeroOTUS();
                    printSharedData(lookup, out2, printHeaders);
                    out2.close();
                }else {
                    printSharedData(lookup, out, printHeaders); //prints info to the .shared file
                }
                delete lookup;

                processedLabels.insert(SharedList->getLabel());
                userLabels.erase(SharedList->getLabel());
            }

            if ((util.anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) ) && (processedLabels.count(lastLabel) != 1)) {
                string saveLabel = SharedList->getLabel();

                delete SharedList;
                SharedList = input.getSharedListVector(lastLabel); //get new list vector to process

                lookup = SharedList->getSharedRAbundVector();
                m->mothurOut(lookup->getLabel()+"\n"); 

                if (m->getControl_pressed()) {
                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                    delete lookup;
                    if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                    return 0;
                }

                //if picked groups must split the shared file by label
                if (pickedGroups) {
                    string filename = listfile;
                    if (outputDir == "") { outputDir += util.hasPath(filename); }

                    map<string, string> variables;
                    variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                    variables["[distance]"] = lookup->getLabel();
                    filename = getOutputFileName("shared",variables);
                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                    ofstream out2;
                    util.openOutputFile(filename, out2);

                    lookup->eliminateZeroOTUS();
                    printSharedData(lookup, out2, printHeaders);
                    out2.close();
                }else {
                    printSharedData(lookup, out, printHeaders); //prints info to the .shared file
                }
                delete lookup;

                processedLabels.insert(SharedList->getLabel());
                userLabels.erase(SharedList->getLabel());

                //restore real lastlabel to save below
                SharedList->setLabel(saveLabel);
            }


            lastLabel = SharedList->getLabel();

            delete SharedList;
            SharedList = input.getSharedListVector(); //get new list vector to process
        }
        
        //output error messages about any remaining user labels
        set<string>::iterator it;
        bool needToRun = false;
        for (it = userLabels.begin(); it != userLabels.end(); it++) {
            if (processedLabels.count(lastLabel) != 1) {
                needToRun = true;
            }
        }
        
        //run last label if you need to
        if (needToRun )  {
            if (SharedList != NULL) {	delete SharedList;	}
            SharedList = input.getSharedListVector(lastLabel); //get new list vector to process

            lookup = SharedList->getSharedRAbundVector();
            m->mothurOut(lookup->getLabel()+"\n"); 

            if (m->getControl_pressed()) {
                if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                if (!pickedGroups) { out.close(); util.mothurRemove(filename); }
                return 0;
            }

            //if picked groups must split the shared file by label
            if (pickedGroups) {
                string filename = listfile;
                if (outputDir == "") { outputDir += util.hasPath(filename); }

                map<string, string> variables;
                variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename));
                variables["[distance]"] = lookup->getLabel();
                filename = getOutputFileName("shared",variables);
                outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                ofstream out2;
                util.openOutputFile(filename, out2);

                lookup->eliminateZeroOTUS();
                printSharedData(lookup, out2, printHeaders);
                out2.close();
            }else {
                printSharedData(lookup, out, printHeaders); //prints info to the .shared file
            }
            delete lookup;
            delete SharedList;
        }
        
        if (!pickedGroups) { out.close(); }

        if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }

        if (m->getControl_pressed()) {
            if (!pickedGroups) { util.mothurRemove(filename); }
            return 0;
        }
        
        return 0;
    }
	catch(exception& e) {
		m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
		exit(1);
	}
}
Example #18
0
//**********************************************************************************************************************
int SplitGroupCommand::runCount(){
	try {
        
        CountTable ct;
        ct.readTable(countfile, true, false);
        if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
        
        if (m->control_pressed) { return 0; }
        
        vector<string> namesGroups = ct.getNamesOfGroups();
        SharedUtil util;  util.setGroups(Groups, namesGroups); 
        
        //fill filehandles with neccessary ofstreams
        map<string, string> ffiles; //group -> filename
        map<string, string> cfiles; //group -> filename
        for (int i=0; i<Groups.size(); i++) {
            ofstream ftemp, ctemp;
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
            variables["[group]"] = Groups[i];
            string newFasta = getOutputFileName("fasta",variables);
            outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
            ffiles[Groups[i]] = newFasta;
            m->openOutputFile(newFasta, ftemp); ftemp.close();
            
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
            string newCount = getOutputFileName("count",variables);
            outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
            cfiles[Groups[i]] = newCount;
            m->openOutputFile(newCount, ctemp);
            ctemp << "Representative_Sequence\ttotal\t" << Groups[i] << endl; ctemp.close();
        }
        
        ifstream in; 
        m->openInputFile(fastafile, in);
        
        while (!in.eof()) {
            Sequence seq(in); m->gobble(in);
            
            if (m->control_pressed) { break; }
            if (seq.getName() != "") {
                vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
                for (int i = 0; i < thisSeqsGroups.size(); i++) {
                    if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
                        ofstream outf, outc;
                        m->openOutputFileAppend(ffiles[thisSeqsGroups[i]], outf);
                        seq.printSequence(outf); outf.close();
                        int numSeqs = ct.getGroupCount(seq.getName(), thisSeqsGroups[i]);
                        m->openOutputFileAppend(cfiles[thisSeqsGroups[i]], outc);
                        outc << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl; outc.close();
                    }
                }
            }
        }
        in.close();
        
        return 0;

    }
	catch(exception& e) {
		m->errorOut(e, "SplitGroupCommand", "runCount");
		exit(1);
	}
}
Example #19
0
//**********************************************************************************************************************
SharedCommand::SharedCommand(string option)  {
	try {
        abort = false; calledHelp = false; pickedGroups=false;
		allLines = 1;

		//allow user to run help
		if(option == "help") { help(); abort = true; calledHelp = true; }
		else if(option == "citation") { citation(); abort = true; calledHelp = true;}

		else {

			 vector<string> myArray = setParameters();

			 OptionParser parser(option);
			 map<string, string> parameters = parser.getParameters();

			 ValidParameters validParameter;
			 map<string, string>::iterator it;

			 //check to make sure all parameters are valid for command
			 for (it = parameters.begin(); it != parameters.end(); it++) {
				 if (!validParameter.isValidParameter(it->first, myArray, it->second)) {  abort = true;  }
			 }

			 //if the user changes the input directory command factory will send this info to us in the output parameter
			string inputDir = validParameter.valid(parameters, "inputdir");
			 if (inputDir == "not found"){	inputDir = "";		}
			 else {
				 string path;
				 it = parameters.find("list");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["list"] = inputDir + it->second;		}
				 }

				 it = parameters.find("group");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["group"] = inputDir + it->second;		}
				 }

				 it = parameters.find("count");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["count"] = inputDir + it->second;		}
				 }

                 it = parameters.find("biom");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["biom"] = inputDir + it->second;		}
				 }
			 }

             vector<string> tempOutNames;
             outputTypes["shared"] = tempOutNames;
             outputTypes["group"] = tempOutNames;
             outputTypes["map"] = tempOutNames;

			 //if the user changes the output directory command factory will send this info to us in the output parameter
			 outputDir = validParameter.valid(parameters, "outputdir");		if (outputDir == "not found"){	outputDir = "";	}

			 //check for required parameters
			 listfile = validParameter.validFile(parameters, "list");
			 if (listfile == "not open") { listfile = ""; abort = true; }
			 else if (listfile == "not found") { listfile = "";  }
			 else { current->setListFile(listfile); }

             biomfile = validParameter.validFile(parameters, "biom");
             if (biomfile == "not open") { biomfile = ""; abort = true; }
             else if (biomfile == "not found") { biomfile = "";  }
             else { current->setBiomFile(biomfile); }

			 ordergroupfile = validParameter.validFile(parameters, "ordergroup");
			 if (ordergroupfile == "not open") { abort = true; }
			 else if (ordergroupfile == "not found") { ordergroupfile = ""; }

			 groupfile = validParameter.validFile(parameters, "group");
			 if (groupfile == "not open") { groupfile = ""; abort = true; }
			 else if (groupfile == "not found") { groupfile = ""; }
			 else {  current->setGroupFile(groupfile); }

             countfile = validParameter.validFile(parameters, "count");
             if (countfile == "not open") { countfile = ""; abort = true; }
             else if (countfile == "not found") { countfile = ""; }
             else {
                 current->setCountFile(countfile);
                 CountTable temp;
                 if (!temp.testGroups(countfile)) {
                     m->mothurOut("\n[WARNING]: Your count file does not have group info, all reads will be assigned to mothurGroup.\n");
                     
                     temp.readTable(countfile, false, false); //dont read groups
                     map<string, int> seqs = temp.getNameMap();
                     
                     CountTable newCountTable;
                     newCountTable.addGroup("mothurGroup");
                     
                     for (map<string, int>::iterator it = seqs.begin(); it != seqs.end(); it++) {
                         vector<int> counts; counts.push_back(it->second);
                         newCountTable.push_back(it->first, counts);
                     }
                     
                     string newCountfileName = util.getRootName(countfile) + "mothurGroup" + util.getExtension(countfile);
                     newCountTable.printTable(newCountfileName);
                     
                     current->setCountFile(newCountfileName);
                     countfile = newCountfileName;
                     outputNames.push_back(newCountfileName);
                 }
             }

            if ((biomfile == "") && (listfile == "") && (countfile == "")) { //you must provide at least one of the following
				//is there are current file available for either of these?
				//give priority to list, then biom, then count
				listfile = current->getListFile();
				if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter.\n");  }
				else {
					biomfile = current->getBiomFile();
                    if (biomfile != "") {  m->mothurOut("Using " + biomfile + " as input file for the biom parameter.\n"); }
					else {
                        countfile = current->getCountFile();
                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter.\n");  }
                        else {
                            m->mothurOut("[ERROR]: No valid current files. You must provide a list or biom or count file before you can use the make.shared command.\n");  abort = true;
                        }

					}
				}
			}
			else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom.\n"); abort = true; }

			if (listfile != "") {
				if ((groupfile == "") && (countfile == "")) {
					groupfile = current->getGroupFile();
					if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n");  }
					else {
						countfile = current->getCountFile();
                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); }
                        else { m->mothurOut("[ERROR]: You need to provide a groupfile or countfile if you are going to use the list format.\n");  abort = true; }
					}
				}
			}


			 string groups = validParameter.valid(parameters, "groups");
			 if (groups == "not found") { groups = ""; }
			 else {
                 pickedGroups=true;
				 util.splitAtDash(groups, Groups);
                if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } }
			 }

			 //check for optional parameter and set defaults
			 // ...at some point should added some additional type checking...
			 string label = validParameter.valid(parameters, "label");
			 if (label == "not found") { label = ""; }
			 else {
				 if(label != "all") {  util.splitAtDash(label, labels);  allLines = 0;  }
				 else { allLines = 1;  }
			 }
            
            if ((listfile == "") && (biomfile == "") && (countfile != "")) { //building a shared file from a count file, require label
                if (labels.size() == 0) {
                    m->mothurOut("[ERROR]: You must provide a label when converting a count file to a shared file, please correct.\n");  abort = true;
                }
            }
		}

	}
	catch(exception& e) {
		m->errorOut(e, "SharedCommand", "SharedCommand");
		exit(1);
	}
}
//***************************************************************************************************************
int SummaryQualCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		int start = time(NULL);
		int numSeqs = 0;
		
		vector<int> position;
		vector<int> averageQ;
		vector< vector<int> > scores;
				
		if (m->control_pressed) { return 0; }
		
		if (namefile != "") { nameMap = m->readNames(namefile); }
		else if (countfile != "") {
            CountTable ct;
            ct.readTable(countfile, false, false);
            nameMap = ct.getNameMap();
        }
        
		vector<unsigned long long> positions; 
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		positions = m->divideFile(qualfile, processors);
		for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(linePair(positions[i], positions[(i+1)]));	}
#else	
		if (processors == 1) {
			lines.push_back(linePair(0, 1000)); 
		}else {
			positions = m->setFilePosFasta(qualfile, numSeqs); 
            if (numSeqs < processors) { processors = numSeqs; }
			
			//figure out how many sequences you have to process
			int numSeqsPerProcessor = numSeqs / processors;
			for (int i = 0; i < processors; i++) {
				int startIndex =  i * numSeqsPerProcessor;
				if(i == (processors - 1)){	numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; 	}
				lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
			}
		}
#endif
		
		
		if(processors == 1){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]);  }
		else{  numSeqs = createProcessesCreateSummary(position, averageQ, scores, qualfile);  }
		
		if (m->control_pressed) {  return 0; }
		
		//print summary file
        map<string, string> variables; 
		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qualfile));
		string summaryFile = getOutputFileName("summary",variables);
		printQual(summaryFile, position, averageQ, scores);
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		//output results to screen
		cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
		m->mothurOutEndLine();
		m->mothurOut("Position\tNumSeqs\tAverageQ"); m->mothurOutEndLine();
		for (int i = 0; i < position.size(); i+=100) {
			float average = averageQ[i] / (float) position[i];
			cout << i << '\t' << position[i] << '\t' << average;
			m->mothurOutJustToLog(toString(i) + "\t" + toString(position[i]) + "\t" + toString(average)); m->mothurOutEndLine();
		}
		
		m->mothurOutEndLine();
		m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(summaryFile); m->mothurOutEndLine();	outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
		m->mothurOutEndLine();
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SummaryQualCommand", "execute");
		exit(1);
	}
}
Example #21
0
int ClassifySeqsCommand::execute(){
	try {
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
        
        string outputMethodTag = method;
		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	}
		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());				}
        else if(method == "zap"){	
            outputMethodTag = search + "_" + outputMethodTag;
            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
        }
		else {
			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
			m->mothurOutEndLine();
			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	
		}
		
		if (m->control_pressed) { delete classify; return 0; }
				
		for (int s = 0; s < fastaFileNames.size(); s++) {
		
			m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
			
			string baseTName = m->getSimpleName(taxonomyFileName);
			
            //set rippedTaxName to 
			string RippedTaxName = "";
            bool foundDot = false;
            for (int i = baseTName.length()-1; i >= 0; i--) {
                if (foundDot && (baseTName[i] != '.')) {  RippedTaxName = baseTName[i] + RippedTaxName; }
                else if (foundDot && (baseTName[i] == '.')) {  break; }
                else if (!foundDot && (baseTName[i] == '.')) {  foundDot = true; }
            }
            //if (RippedTaxName != "") { RippedTaxName +=  "."; }   
          
			if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
            variables["[tag]"] = RippedTaxName;
            variables["[tag2]"] = outputMethodTag;
			string newTaxonomyFile = getOutputFileName("taxonomy", variables);
			string newaccnosFile = getOutputFileName("accnos", variables);
			string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
			string taxSummary = getOutputFileName("taxsummary", variables);
			
			if ((method == "knn") && (search == "distance")) { 
				string DistName = getOutputFileName("matchdist", variables);
				classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
			}
			
			outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
			outputNames.push_back(taxSummary);	outputTypes["taxsummary"].push_back(taxSummary);
			
			int start = time(NULL);
			int numFastaSeqs = 0;
			for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
		
			vector<unsigned long long> positions; 
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
			positions = m->divideFile(fastaFileNames[s], processors);
			for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
#else
			if (processors == 1) {
				lines.push_back(new linePair(0, 1000));
			}else {
				positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); 
                if (numFastaSeqs < processors) { processors = numFastaSeqs; }
				
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numFastaSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			}
#endif
			if(processors == 1){
				numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]);
			}else{
				numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); 
			}
			
			if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
                outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
            }else { m->mothurRemove(newaccnosFile); }

            m->mothurOutEndLine();
            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
            start = time(NULL);
            
            //read namefile
            if(namefile != "") {
                
                m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
                nameMap.clear(); //remove old names
                m->readNames(namefileNames[s], nameMap);
                m->mothurOut("  Done."); m->mothurOutEndLine();
            }
            
            //output taxonomy with the unclassified bins added
            ifstream inTax;
            m->openInputFile(newTaxonomyFile, inTax);
            
            ofstream outTax;
            string unclass = newTaxonomyFile + ".unclass.temp";
            m->openOutputFile(unclass, outTax);
            
            //get maxLevel from phylotree so you know how many 'unclassified's to add
            int maxLevel = classify->getMaxLevel();
            
            //read taxfile - this reading and rewriting is done to preserve the confidence scores.
            string name, taxon;
            string group = "";
            GroupMap* groupMap = NULL;
            CountTable* ct = NULL;
            PhyloSummary* taxaSum;
            
            if (hasCount) {
                ct = new CountTable();
                ct->readTable(countfileNames[s], true, false);
                taxaSum = new PhyloSummary(ct, relabund, printlevel);
            }else {
                if (groupfile != "") {  group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); }
                taxaSum = new PhyloSummary(groupMap, relabund, printlevel);
            }
            
            while (!inTax.eof()) {
                if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
                
                inTax >> name >> taxon; m->gobble(inTax);
                
                string newTax = m->addUnclassifieds(taxon, maxLevel, probs);
                
                outTax << name << '\t' << newTax << endl;
                
                if (namefile != "") {
                    itNames = nameMap.find(name);
                    
                    if (itNames == nameMap.end()) {
                        m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
                    }else{
                        for (int i = 0; i < itNames->second.size(); i++) {
                            taxaSum->addSeqToTree(itNames->second[i], newTax);  //add it as many times as there are identical seqs
                        }
                        itNames->second.clear();
                        nameMap.erase(itNames->first);
                    }
                }else {
                    taxaSum->addSeqToTree(name, newTax);
                }
            }
            inTax.close();
            outTax.close();
            
            m->mothurRemove(newTaxonomyFile);
            rename(unclass.c_str(), newTaxonomyFile.c_str());
            
            if (m->control_pressed) {  outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
            
            //print summary file
            ofstream outTaxTree;
            m->openOutputFile(taxSummary, outTaxTree);
            taxaSum->print(outTaxTree, output);
            outTaxTree.close();
            
            if (ct != NULL) { delete ct; }
            if (groupMap != NULL) { delete groupMap; } delete taxaSum;
            m->mothurRemove(tempTaxonomyFile);
            
            m->mothurOutEndLine();
            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
			
		}
        delete classify;
        
        m->mothurOutEndLine();
        m->mothurOut("Output File Names: "); m->mothurOutEndLine();
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
        m->mothurOutEndLine();
		
		//set taxonomy file as new current taxonomyfile
		string current = "";
		itTypes = outputTypes.find("taxonomy");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
		}
		
		current = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
		}
		
		
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClassifySeqsCommand", "execute");
		exit(1);
	}
}