Exemplo n.º 1
0
int main(int argc, char** argv) {
	if(argc == 5) {
		const std::string inPath  = argv[1];
		const std::string outPath = argv[2];
		const int maxLength     = boost::lexical_cast<int>(argv[3]);
		const int overlapLength = boost::lexical_cast<int>(argv[4]);
		if(maxLength <= overlapLength) {
			std::cout << " arg3(after splitted length) must be bigger than arg4(overlap length)." << std::endl;
			abort();
		}
		readFASTA(inPath, outPath, maxLength, overlapLength);
	} else {
		std::cout
			<< "This program splits large reference genome into many small parts." << std::endl
			<< std::endl
			<< " <------------------------ large reference genome ------------------------>" << std::endl
			<< " <-------- small part -------->" << std::endl
			<< "                          <-------- small part -------->" << std::endl
			<< "                                                   <----- small part ----->" << std::endl
			<< std::endl
			<< "usage:" << std::endl
			<< " arg1 -> input reference fasta file path." << std::endl
			<< " arg2 -> output file path." << std::endl
			<< " arg3 -> after splitted length." << std::endl
			<< " arg4 -> overlap length between parts." << std::endl;
	}
	return 0;
}
Exemplo n.º 2
0
int PreClusterCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		int start = time(NULL);
        
        if(align == "gotoh")			{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 1000);	}
        else if(align == "needleman")	{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000);			}
        else if(align == "blast")		{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
        else if(align == "noalign")		{	alignment = new NoAlign();													}
        else {
            m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
            m->mothurOutEndLine();
            alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000);
        }
		
		string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
        map<string, string> variables; 
        variables["[filename]"] = fileroot;
		string newNamesFile = getOutputFileName("name",variables);
        string newCountFile = getOutputFileName("count",variables);
		string newMapFile = getOutputFileName("map",variables); //add group name if by group
        variables["[extension]"] = m->getExtension(fastafile);
		string newFastaFile = getOutputFileName("fasta", variables);
		outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile);
		if (countfile == "") { outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile); }
		else { outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); }
		
		if (bygroup) {
			//clear out old files
			ofstream outFasta; m->openOutputFile(newFastaFile, outFasta); outFasta.close();
			ofstream outNames; m->openOutputFile(newNamesFile, outNames);  outNames.close();
			newMapFile = fileroot + "precluster.";
			
			//parse fasta and name file by group
            vector<string> groups;
			if (countfile != "") {
                cparser = new SequenceCountParser(countfile, fastafile);
                groups = cparser->getNamesOfGroups();
            }else {
                if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile);	}
                else				{ parser = new SequenceParser(groupfile, fastafile);			}
                groups = parser->getNamesOfGroups();
			}
            
			if(processors == 1)	{	driverGroups(newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups);	}
			else				{	createProcessesGroups(newFastaFile, newNamesFile, newMapFile, groups);			}
			
			if (countfile != "") { 
                mergeGroupCounts(newCountFile, newNamesFile, newFastaFile);
                delete cparser; 
            }else {  
                delete parser; 
                //run unique.seqs for deconvolute results
                string inputString = "fasta=" + newFastaFile;
                if (namefile != "") { inputString += ", name=" + newNamesFile; }
                m->mothurOutEndLine(); 
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
                m->mothurCalling = true;
                
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
                m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                m->renameFile(filenames["fasta"][0], newFastaFile);
                m->renameFile(filenames["name"][0], newNamesFile); 
			}
            if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	}	 delete alignment; return 0; }
			m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine(); 
				
		}else {
            if (processors != 1) { m->mothurOut("When using running without group information mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
			if (namefile != "") { readNameFile(); }
		
			//reads fasta file and return number of seqs
			int numSeqs = readFASTA(); //fills alignSeqs and makes all seqs active
		
			if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} delete alignment; return 0; }
	
			if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); delete alignment; return 0;  }
			if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); delete alignment; return 0;  }
			
			int count = process(newMapFile);
			outputNames.push_back(newMapFile); outputTypes["map"].push_back(newMapFile);
			
			if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} delete alignment; return 0; }
			
			m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
			m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
			if (countfile != "") { newNamesFile = newCountFile; }
            printData(newFastaFile, newNamesFile, "");
            			
			m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
		}
				
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} delete alignment; return 0; }
        
        delete alignment;
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}		
		m->mothurOutEndLine();
		
		//set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
		}
		
		itTypes = outputTypes.find("name");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
		}
        
        itTypes = outputTypes.find("count");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
		}
		
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "PreClusterCommand", "execute");
		exit(1);
	}
}