Ejemplo n.º 1
0
int ClusterCommand::execute(){
	try {
	
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//phylip file given and cutoff not given - use cluster.classic because it uses less memory and is faster
		if ((format == "phylip") && (cutoff > 10.0)) {
			m->mothurOutEndLine(); m->mothurOut("You are using a phylip file and no cutoff.  I will run cluster.classic to save memory and time."); m->mothurOutEndLine();
			
			//run unique.seqs for deconvolute results
			string inputString = "phylip=" + distfile;
			if (namefile != "") { inputString += ", name=" + namefile; }
			inputString += ", precision=" + toString(precision);
			inputString += ", method=" + method;
			if (hard)	{ inputString += ", hard=T";	}
			else		{ inputString += ", hard=F";	}
			if (sim)	{ inputString += ", sim=T";		}
			else		{ inputString += ", sim=F";		}

			
			m->mothurOutEndLine(); 
			m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine(); 
			m->mothurOut("Running command: cluster.classic(" + inputString + ")"); m->mothurOutEndLine(); 
			
			Command* clusterClassicCommand = new ClusterDoturCommand(inputString);
			clusterClassicCommand->execute();
			delete clusterClassicCommand;
			
			m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine();  

			return 0;
		}
		
		ReadMatrix* read;
		if (format == "column") { read = new ReadColumnMatrix(columnfile, sim); }	//sim indicates whether its a similarity matrix
		else if (format == "phylip") { read = new ReadPhylipMatrix(phylipfile, sim); }
		
		read->setCutoff(cutoff);
		
		NameAssignment* nameMap = NULL;
		if(namefile != ""){	
			nameMap = new NameAssignment(namefile);
			nameMap->readMap();
		}
		
		read->read(nameMap);
		list = read->getListVector();
		matrix = read->getMatrix();
		rabund = new RAbundVector(list->getRAbundVector());
		delete read;
		
		if (m->control_pressed) { //clean up
			delete list; delete matrix; delete rabund;
			sabundFile.close();rabundFile.close();listFile.close();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} outputTypes.clear();
			return 0;
		}
		
		//create cluster
		if (method == "furthest")	{	cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
		else if(method == "nearest"){	cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
		else if(method == "average"){	cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);	}
		else if(method == "weighted"){	cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method);	}
		tag = cluster->getTag();
		
		if (outputDir == "") { outputDir += m->hasPath(distfile); }
		fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
		
		m->openOutputFile(fileroot+ tag + ".sabund",	sabundFile);
		m->openOutputFile(fileroot+ tag + ".rabund",	rabundFile);
		m->openOutputFile(fileroot+ tag + ".list",		listFile);
		
		outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
		outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
		outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
		
		
		time_t estart = time(NULL);
		float previousDist = 0.00000;
		float rndPreviousDist = 0.00000;
		oldRAbund = *rabund;
		oldList = *list;

		print_start = true;
		start = time(NULL);
		loops = 0;
		double saveCutoff = cutoff;
		
		while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
		
			if (m->control_pressed) { //clean up
				delete list; delete matrix; delete rabund; delete cluster;
				sabundFile.close();rabundFile.close();listFile.close();
				for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} outputTypes.clear();
				return 0;
			}
		
			if (print_start && m->isTrue(timing)) {
				m->mothurOut("Clustering (" + tag + ") dist " + toString(matrix->getSmallDist()) + "/" 
					+ toString(m->roundDist(matrix->getSmallDist(), precision)) 
					+ "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
				cout.flush();
				print_start = false;
			}

			loops++;

			cluster->update(cutoff);
	
			float dist = matrix->getSmallDist();
			float rndDist;
			if (hard) {
				rndDist = m->ceilDist(dist, precision); 
			}else{
				rndDist = m->roundDist(dist, precision); 
			}

			if(previousDist <= 0.0000 && dist != previousDist){
				printData("unique");
			}
			else if(rndDist != rndPreviousDist){
				printData(toString(rndPreviousDist,  length-1));
			}
		
			previousDist = dist;
			rndPreviousDist = rndDist;
			oldRAbund = *rabund;
			oldList = *list;
		}

		if (print_start && m->isTrue(timing)) {
			m->mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) 
					 + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
			cout.flush();
	 		print_start = false;
		}
		
		if(previousDist <= 0.0000){
			printData("unique");
		}
		else if(rndPreviousDist<cutoff){
			printData(toString(rndPreviousDist, length-1));
		}
		
		delete matrix;
		delete list;
		delete rabund;
		delete cluster;
		
		sabundFile.close();
		rabundFile.close();
		listFile.close();
	
		if (saveCutoff != cutoff) { 
			if (hard)	{  saveCutoff = m->ceilDist(saveCutoff, precision);	}
			else		{	saveCutoff = m->roundDist(saveCutoff, precision);  }

			m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
		}
		
		//set list file as new current listfile
		string current = "";
		itTypes = outputTypes.find("list");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
		}
		
		//set rabund file as new current rabundfile
		itTypes = outputTypes.find("rabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
		}
		
		//set sabund file as new current sabundfile
		itTypes = outputTypes.find("sabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		
		//if (m->isTrue(timing)) {
			m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
		//}
		
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClusterCommand", "execute");
		exit(1);
	}
}
Ejemplo n.º 2
0
int HClusterCommand::execute(){
	try {
	
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		NameAssignment* nameMap = NULL;
		if(namefile != ""){	
			nameMap = new NameAssignment(namefile);
			nameMap->readMap();
		}		
		
		time_t estart = time(NULL);
		
		if (!sorted) {
			read = new ReadCluster(distfile, cutoff, outputDir, true); 	
			read->setFormat(format);
			read->read(nameMap);
			
			if (m->control_pressed) {  
				delete read; 
				sabundFile.close();
				rabundFile.close();
				listFile.close();
				for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } outputTypes.clear();
				return 0;  
			}
			
			distfile = read->getOutputFile();
		
			list = read->getListVector();
			delete read;
		}else {
			list = new ListVector(nameMap->getListVector());
		}
		
		if (m->control_pressed) {  
			sabundFile.close();
			rabundFile.close();
			listFile.close();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } outputTypes.clear();
			return 0;  
		}

		m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort. "); m->mothurOutEndLine();
		estart = time(NULL);
	
		//list vector made by read contains all sequence names
		if(list != NULL){
			rabund = new RAbundVector(list->getRAbundVector());
		}else{
			m->mothurOut("Error: no list vector!"); m->mothurOutEndLine(); return 0;
		}
        
        list->printHeaders(listFile);
        
		float previousDist = 0.00000;
		float rndPreviousDist = 0.00000;
		oldRAbund = *rabund;
		oldList = *list;
		
		print_start = true;
		start = time(NULL);
				
		cluster = new HCluster(rabund, list, method, distfile, nameMap, cutoff);
		vector<seqDist> seqs; seqs.resize(1); // to start loop
		
		if (m->control_pressed) {  
				delete cluster;
				sabundFile.close();
				rabundFile.close();
				listFile.close();
				for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } outputTypes.clear();
				return 0;  
		}

		float saveCutoff = cutoff;
		
		while (seqs.size() != 0){
		
			seqs = cluster->getSeqs();
			
			//to account for cutoff change in average neighbor
			if (seqs.size() != 0) {
				if (seqs[0].dist > cutoff) { break; }
			}
			
			if (m->control_pressed) {  
				delete cluster;
				sabundFile.close();
				rabundFile.close();
				listFile.close();
				for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } outputTypes.clear();
				return 0;  
			}

			for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
				
				if (seqs[i].seq1 != seqs[i].seq2) {
					cutoff = cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
					
					if (m->control_pressed) {  
						delete cluster;
						sabundFile.close();
						rabundFile.close();
						listFile.close();
						for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } outputTypes.clear();
						return 0;  
					}

			
					float rndDist;
					if (hard) {
						rndDist = m->ceilDist(seqs[i].dist, precision); 
					}else{
						rndDist = m->roundDist(seqs[i].dist, precision); 
					}

					
					if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
						printData("unique");
					}
					else if((rndDist != rndPreviousDist)){
						printData(toString(rndPreviousDist,  length-1));
					}
				
					previousDist = seqs[i].dist;
					rndPreviousDist = rndDist;
					oldRAbund = *rabund;
					oldList = *list;
				}
			}
		}

		if (m->control_pressed) {  
			delete cluster;
			sabundFile.close();
			rabundFile.close();
			listFile.close();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } outputTypes.clear();
			return 0;  
		}
					
		if(previousDist <= 0.0000){
			printData("unique");
		}
		else if(rndPreviousDist<cutoff){
			printData(toString(rndPreviousDist, length-1));
		}
				
		sabundFile.close();
		rabundFile.close();
		listFile.close();
		delete cluster;
		
		if (m->control_pressed) {  
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } outputTypes.clear();
			return 0;  
		}
		
		
		if (saveCutoff != cutoff) { 
			if (hard)	{  saveCutoff = m->ceilDist(saveCutoff, precision);	}
			else		{	saveCutoff = m->roundDist(saveCutoff, precision);  }
			
			m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
		}
		
		//set list file as new current listfile
		string current = "";
		itTypes = outputTypes.find("list");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
		}
		
		//set rabund file as new current rabundfile
		itTypes = outputTypes.find("rabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
		}
		
		//set sabund file as new current sabundfile
		itTypes = outputTypes.find("sabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
		}
		
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();
		
		m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster. "); m->mothurOutEndLine();
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "HClusterCommand", "execute");
		exit(1);
	}
}
Ejemplo n.º 3
0
int ClusterDoturCommand::execute(){
	try {
	
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
		
        
        ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
        
        NameAssignment* nameMap = NULL;
        CountTable* ct = NULL;
        map<string, int> counts;
        if(namefile != "") {	
			nameMap = new NameAssignment(namefile);
			nameMap->readMap();
            cluster->readPhylipFile(phylipfile, nameMap);
            delete nameMap;
		}else if (countfile != "") {
            ct = new CountTable();
            ct->readTable(countfile, false, false);
            cluster->readPhylipFile(phylipfile, ct);
            counts = ct->getNameMap();
            delete ct;
        }else {
            cluster->readPhylipFile(phylipfile, nameMap);
        }
        tag = cluster->getTag();
        
		if (m->getControl_pressed()) { delete cluster; return 0; }
		
		list = cluster->getListVector();
		rabund = cluster->getRAbundVector();
								
		if (outputDir == "") { outputDir += util.hasPath(phylipfile); }
		fileroot = outputDir + util.getRootName(util.getSimpleName(phylipfile));
			
        map<string, string> variables; 
        variables["[filename]"] = fileroot;
        variables["[clustertag]"] = tag;
        string sabundFileName = getOutputFileName("sabund", variables);
        string rabundFileName = getOutputFileName("rabund", variables);
        //if (countfile != "") { variables["[tag2]"] = "unique_list"; }
        string listFileName = getOutputFileName("list", variables);
        
        if (countfile == "") {
            util.openOutputFile(sabundFileName,	sabundFile);
            util.openOutputFile(rabundFileName,	rabundFile);
            outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
            outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
            
        }
		util.openOutputFile(listFileName,	listFile);
        outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
		
		float previousDist = 0.00000;
		float rndPreviousDist = 0.00000;
		oldRAbund = *rabund;
		oldList = *list;
        bool printHeaders = true;
		
        int estart = time(NULL); int loop = 0;
	
		while ((cluster->getSmallDist() <= cutoff) && (cluster->getNSeqs() > 1)){
			if (m->getControl_pressed()) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close();  util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); }
                listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear();  return 0;  }
		
			cluster->update(cutoff);
	
			float dist = cluster->getSmallDist();
			float rndDist = util.ceilDist(dist, precision);
            
            //cout << loop << '\t' << dist << '\t' << oldList.getNumBins() << endl; loop++;

			if(previousDist <= 0.0000 && dist != previousDist)  { printData("unique", counts, printHeaders);                                }
			else if(rndDist != rndPreviousDist)                 { printData(toString(rndPreviousDist,  length-1), counts, printHeaders);    }
		
			previousDist = dist;
			rndPreviousDist = rndDist;
			oldRAbund = *rabund;
			oldList = *list;
		}
	
		if(previousDist <= 0.0000)          { printData("unique", counts, printHeaders);                            }
		else if(rndPreviousDist<cutoff)     { printData(toString(rndPreviousDist, length-1), counts, printHeaders); }
		
        if (countfile == "") {
            sabundFile.close();
            rabundFile.close();
        }
		listFile.close();
		
		delete cluster;  delete list; delete rabund;
		
		//set list file as new current listfile
		string currentName = "";
		itTypes = outputTypes.find("list");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } }
		
		//set rabund file as new current rabundfile
		itTypes = outputTypes.find("rabund");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } }
		
		//set sabund file as new current sabundfile
		itTypes = outputTypes.find("sabund");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } }
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClusterDoturCommand", "execute");
		exit(1);
	}
}
Ejemplo n.º 4
0
int TreeGroupCommand::execute(){
	try {
	
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
		
		if (format == "sharedfile") {
			InputData input(sharedfile, "sharedfile", Groups);
			SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
			lastLabel = lookup->getLabel();
            Groups = lookup->getNamesGroups();
			
            if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command.\n");   return 0; }
			
			//create treemap class from groupmap for tree class to use
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < Groups.size(); i++) {
                nameMap.insert(Groups[i]);
                gps.insert(Groups[i]);
                groupMap[Groups[i]] = Groups[i];
            }
            ct.createTable(nameMap, groupMap, gps);
			
			//fills tree names with shared files groups
			Treenames = lookup->getNamesGroups();
            
			if (m->getControl_pressed()) { return 0; }
			
			//create tree file
			makeSimsShared(input, lookup, ct);
			
			if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);  }  return 0; }
		}else{
			//read in dist file
			filename = inputfile;
            
            ReadMatrix* readMatrix;
			if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }	
			else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
				
			readMatrix->setCutoff(cutoff);
	
            ListVector* list;
            if(namefile != ""){	
                NameAssignment* nameMap = new NameAssignment(namefile);
                nameMap->readMap();
                readMatrix->read(nameMap);
                list = readMatrix->getListVector();
                delete nameMap;
            }else if (countfile != "") {
                CountTable* ct = new CountTable();
                ct->readTable(countfile, true, false);
                readMatrix->read(ct);
                list = readMatrix->getListVector();
                delete ct;
            }else { NameAssignment* nameMap = NULL; readMatrix->read(nameMap); list = readMatrix->getListVector(); }

			SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
			Treenames.clear();
            
			//make treemap
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < list->getNumBins(); i++) {
                string bin = list->get(i);
                nameMap.insert(bin); 
                gps.insert(bin); 
                groupMap[bin] = bin;
                Treenames.push_back(bin);
            }
            ct.createTable(nameMap, groupMap, gps);
			vector<string> namesGroups = ct.getNamesOfGroups();
			
			if (m->getControl_pressed()) { return 0; }
			
			vector< vector<double> > matrix = makeSimsDist(dMatrix, list->getNumBins());
            delete readMatrix;
            delete dMatrix;
			
			if (m->getControl_pressed()) { return 0; }

			//create a new filename
            map<string, string> variables; 
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
			string outputFile = getOutputFileName("tree",variables);	
			outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
				
            Tree* newTree = new Tree(&ct, matrix, Treenames);
            if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
            else { newTree->assembleTree(); }
 
            if (newTree != NULL) {  newTree->createNewickFile(outputFile);  delete newTree; }
			
			if (m->getControl_pressed()) { return 0; } m->mothurOut("Tree complete.\n");
		}
				
		//set tree file as new current treefile
		string currentName = "";
		itTypes = outputTypes.find("tree");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); }
		}
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "execute");
		exit(1);
	}
}