Exemplo n.º 1
0
int ChimeraPerseusCommand::execute(){
	try{
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
				
		//process each file
		for (int s = 0; s < fastaFileNames.size(); s++) {
			
			m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
			
			int start = time(NULL);	
			if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it				
			string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera";
			string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "perseus.accnos";
			//string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
			
			//you provided a groupfile
			string groupFile = "";
			if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
			
			string nameFile = "";
			if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
				nameFile = nameFileNames[s];
			}else { nameFile = getNamesFile(fastaFileNames[s]); }
			
			if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} return 0;	}				
			
			int numSeqs = 0;
			int numChimeras = 0;
			
			if (groupFile != "") {
				//Parse sequences by group
				SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
				vector<string> groups = parser.getNamesOfGroups();
				
				if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  return 0; }
				
				//clears files
				ofstream out, out1, out2;
				m->openOutputFile(outputFileName, out); out.close(); 
				m->openOutputFile(accnosFileName, out1); out1.close();
				
				if(processors == 1)	{	numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups);	}
				else				{	numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);			}
				
				if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  return 0;	}				
				
				numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName);
				
				m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
				
				if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  return 0;	}				
				
			}else{
				if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
				
				//read sequences and store sorted by frequency
				vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
				
				if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} return 0; }
				
				numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
			}
			
			if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} return 0; }
			
			m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");	m->mothurOutEndLine();
			outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
			outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
		}
		
		//set accnos file as new current accnosfile
		string current = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}	
		m->mothurOutEndLine();
		
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraPerseusCommand", "execute");
		exit(1);
	}
}
Exemplo n.º 2
0
int PreClusterCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		int start = time(NULL);
        
        if(align == "gotoh")			{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 1000);	}
        else if(align == "needleman")	{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000);			}
        else if(align == "blast")		{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
        else if(align == "noalign")		{	alignment = new NoAlign();													}
        else {
            m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
            m->mothurOutEndLine();
            alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000);
        }
		
		string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
        map<string, string> variables; 
        variables["[filename]"] = fileroot;
		string newNamesFile = getOutputFileName("name",variables);
        string newCountFile = getOutputFileName("count",variables);
		string newMapFile = getOutputFileName("map",variables); //add group name if by group
        variables["[extension]"] = m->getExtension(fastafile);
		string newFastaFile = getOutputFileName("fasta", variables);
		outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile);
		if (countfile == "") { outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile); }
		else { outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); }
		
		if (bygroup) {
			//clear out old files
			ofstream outFasta; m->openOutputFile(newFastaFile, outFasta); outFasta.close();
			ofstream outNames; m->openOutputFile(newNamesFile, outNames);  outNames.close();
			newMapFile = fileroot + "precluster.";
			
			//parse fasta and name file by group
            vector<string> groups;
			if (countfile != "") {
                cparser = new SequenceCountParser(countfile, fastafile);
                groups = cparser->getNamesOfGroups();
            }else {
                if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile);	}
                else				{ parser = new SequenceParser(groupfile, fastafile);			}
                groups = parser->getNamesOfGroups();
			}
            
			if(processors == 1)	{	driverGroups(newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups);	}
			else				{	createProcessesGroups(newFastaFile, newNamesFile, newMapFile, groups);			}
			
			if (countfile != "") { 
                mergeGroupCounts(newCountFile, newNamesFile, newFastaFile);
                delete cparser; 
            }else {  
                delete parser; 
                //run unique.seqs for deconvolute results
                string inputString = "fasta=" + newFastaFile;
                if (namefile != "") { inputString += ", name=" + newNamesFile; }
                m->mothurOutEndLine(); 
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
                m->mothurCalling = true;
                
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
                m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                m->renameFile(filenames["fasta"][0], newFastaFile);
                m->renameFile(filenames["name"][0], newNamesFile); 
			}
            if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	}	 delete alignment; return 0; }
			m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine(); 
				
		}else {
            if (processors != 1) { m->mothurOut("When using running without group information mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
			if (namefile != "") { readNameFile(); }
		
			//reads fasta file and return number of seqs
			int numSeqs = readFASTA(); //fills alignSeqs and makes all seqs active
		
			if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} delete alignment; return 0; }
	
			if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); delete alignment; return 0;  }
			if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); delete alignment; return 0;  }
			
			int count = process(newMapFile);
			outputNames.push_back(newMapFile); outputTypes["map"].push_back(newMapFile);
			
			if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} delete alignment; return 0; }
			
			m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
			m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
			if (countfile != "") { newNamesFile = newCountFile; }
            printData(newFastaFile, newNamesFile, "");
            			
			m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
		}
				
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} delete alignment; return 0; }
        
        delete alignment;
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}		
		m->mothurOutEndLine();
		
		//set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
		}
		
		itTypes = outputTypes.find("name");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
		}
        
        itTypes = outputTypes.find("count");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
		}
		
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "PreClusterCommand", "execute");
		exit(1);
	}
}