int main()
{
	// detect nuclear particle type
	ReadMatrix dataReader;
	string filename = "nuclear"; 
	string delimiter = ", ";
	MatrixXd data= dataReader.read(filename, delimiter);

	LINE;  
	LDA lda(false);
	lda.train(data);
	LINE;

	cout << "Prediction of the first five labels: " << endl;
	MatrixXd X_ = data.block(0, 0, 5, data.cols() - 1);
	cout << lda.predict(X_).transpose() << endl;
	LINE; LINE;
}
int main()
{
	ReadMatrix dataReader;
	string filename = "dataset";
	string delimiter = ",	"; 
	MatrixXd data = dataReader.read(filename, delimiter);
	
	LINE;
	LAR lar;
	lar.train(data);

	//predict on first 20 X;
	LINE;
	cout << "Prediction of y on first 20 x: " << endl << endl;
	cout << lar.predict(data.block(0, 0, 20, data.cols() - 1)).transpose() << endl<<endl;
	cout << (data.block(0, data.cols() - 1, 20, 1)).transpose() << endl;
	LINE;
}
int main()
{
	// detect nuclear particle type
	ReadMatrix dataReader;
	string filename = "eigenFace";
	string delimiter = ", ";
	MatrixXd data = dataReader.read(filename, delimiter);

	LINE;
	KM km;
	km.train(data);
	LINE;

	cout << "Clusters label of original data: " << endl<<endl;
	MatrixXd clusterLabel = km.clusterLabel(data);
	cout << clusterLabel.transpose() << endl << endl;
	LINE;  
}
Пример #4
0
int ClusterCommand::execute(){
	try {
	
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//phylip file given and cutoff not given - use cluster.classic because it uses less memory and is faster
		if ((format == "phylip") && (cutoff > 10.0)) {
			m->mothurOutEndLine(); m->mothurOut("You are using a phylip file and no cutoff.  I will run cluster.classic to save memory and time."); m->mothurOutEndLine();
			
			//run unique.seqs for deconvolute results
			string inputString = "phylip=" + distfile;
			if (namefile != "") { inputString += ", name=" + namefile; }
			inputString += ", precision=" + toString(precision);
			inputString += ", method=" + method;
			if (hard)	{ inputString += ", hard=T";	}
			else		{ inputString += ", hard=F";	}
			if (sim)	{ inputString += ", sim=T";		}
			else		{ inputString += ", sim=F";		}

			
			m->mothurOutEndLine(); 
			m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine(); 
			m->mothurOut("Running command: cluster.classic(" + inputString + ")"); m->mothurOutEndLine(); 
			
			Command* clusterClassicCommand = new ClusterDoturCommand(inputString);
			clusterClassicCommand->execute();
			delete clusterClassicCommand;
			
			m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine();  

			return 0;
		}
		
		ReadMatrix* read;
		if (format == "column") { read = new ReadColumnMatrix(columnfile, sim); }	//sim indicates whether its a similarity matrix
		else if (format == "phylip") { read = new ReadPhylipMatrix(phylipfile, sim); }
		
		read->setCutoff(cutoff);
		
		NameAssignment* nameMap = NULL;
		if(namefile != ""){	
			nameMap = new NameAssignment(namefile);
			nameMap->readMap();
		}
		
		read->read(nameMap);
		list = read->getListVector();
		matrix = read->getMatrix();
		rabund = new RAbundVector(list->getRAbundVector());
		delete read;
		
		if (m->control_pressed) { //clean up
			delete list; delete matrix; delete rabund;
			sabundFile.close();rabundFile.close();listFile.close();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} outputTypes.clear();
			return 0;
		}
		
		//create cluster
		if (method == "furthest")	{	cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
		else if(method == "nearest"){	cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
		else if(method == "average"){	cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);	}
		else if(method == "weighted"){	cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method);	}
		tag = cluster->getTag();
		
		if (outputDir == "") { outputDir += m->hasPath(distfile); }
		fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
		
		m->openOutputFile(fileroot+ tag + ".sabund",	sabundFile);
		m->openOutputFile(fileroot+ tag + ".rabund",	rabundFile);
		m->openOutputFile(fileroot+ tag + ".list",		listFile);
		
		outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
		outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
		outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
		
		
		time_t estart = time(NULL);
		float previousDist = 0.00000;
		float rndPreviousDist = 0.00000;
		oldRAbund = *rabund;
		oldList = *list;

		print_start = true;
		start = time(NULL);
		loops = 0;
		double saveCutoff = cutoff;
		
		while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
		
			if (m->control_pressed) { //clean up
				delete list; delete matrix; delete rabund; delete cluster;
				sabundFile.close();rabundFile.close();listFile.close();
				for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} outputTypes.clear();
				return 0;
			}
		
			if (print_start && m->isTrue(timing)) {
				m->mothurOut("Clustering (" + tag + ") dist " + toString(matrix->getSmallDist()) + "/" 
					+ toString(m->roundDist(matrix->getSmallDist(), precision)) 
					+ "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
				cout.flush();
				print_start = false;
			}

			loops++;

			cluster->update(cutoff);
	
			float dist = matrix->getSmallDist();
			float rndDist;
			if (hard) {
				rndDist = m->ceilDist(dist, precision); 
			}else{
				rndDist = m->roundDist(dist, precision); 
			}

			if(previousDist <= 0.0000 && dist != previousDist){
				printData("unique");
			}
			else if(rndDist != rndPreviousDist){
				printData(toString(rndPreviousDist,  length-1));
			}
		
			previousDist = dist;
			rndPreviousDist = rndDist;
			oldRAbund = *rabund;
			oldList = *list;
		}

		if (print_start && m->isTrue(timing)) {
			m->mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) 
					 + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
			cout.flush();
	 		print_start = false;
		}
		
		if(previousDist <= 0.0000){
			printData("unique");
		}
		else if(rndPreviousDist<cutoff){
			printData(toString(rndPreviousDist, length-1));
		}
		
		delete matrix;
		delete list;
		delete rabund;
		delete cluster;
		
		sabundFile.close();
		rabundFile.close();
		listFile.close();
	
		if (saveCutoff != cutoff) { 
			if (hard)	{  saveCutoff = m->ceilDist(saveCutoff, precision);	}
			else		{	saveCutoff = m->roundDist(saveCutoff, precision);  }

			m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
		}
		
		//set list file as new current listfile
		string current = "";
		itTypes = outputTypes.find("list");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
		}
		
		//set rabund file as new current rabundfile
		itTypes = outputTypes.find("rabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
		}
		
		//set sabund file as new current sabundfile
		itTypes = outputTypes.find("sabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		
		//if (m->isTrue(timing)) {
			m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
		//}
		
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClusterCommand", "execute");
		exit(1);
	}
}
Пример #5
0
int TreeGroupCommand::execute(){
	try {
	
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		if (format == "sharedfile") {
			
			ValidCalculators validCalculator;
		
			for (int i=0; i<Estimators.size(); i++) {
				if (validCalculator.isValidCalculator("treegroup", Estimators[i]) == true) { 
					if (Estimators[i] == "sharedsobs") { 
						treeCalculators.push_back(new SharedSobsCS());
					}else if (Estimators[i] == "sharedchao") { 
						treeCalculators.push_back(new SharedChao1());
					}else if (Estimators[i] == "sharedace") { 
						treeCalculators.push_back(new SharedAce());
					}else if (Estimators[i] == "jabund") { 	
						treeCalculators.push_back(new JAbund());
					}else if (Estimators[i] == "sorabund") { 
						treeCalculators.push_back(new SorAbund());
					}else if (Estimators[i] == "jclass") { 
						treeCalculators.push_back(new Jclass());
					}else if (Estimators[i] == "sorclass") { 
						treeCalculators.push_back(new SorClass());
					}else if (Estimators[i] == "jest") { 
						treeCalculators.push_back(new Jest());
					}else if (Estimators[i] == "sorest") { 
						treeCalculators.push_back(new SorEst());
					}else if (Estimators[i] == "thetayc") { 
						treeCalculators.push_back(new ThetaYC());
					}else if (Estimators[i] == "thetan") { 
						treeCalculators.push_back(new ThetaN());
					}else if (Estimators[i] == "kstest") { 
						treeCalculators.push_back(new KSTest());
					}else if (Estimators[i] == "sharednseqs") { 
						treeCalculators.push_back(new SharedNSeqs());
					}else if (Estimators[i] == "ochiai") { 
						treeCalculators.push_back(new Ochiai());
					}else if (Estimators[i] == "anderberg") { 
						treeCalculators.push_back(new Anderberg());
					}else if (Estimators[i] == "kulczynski") { 
						treeCalculators.push_back(new Kulczynski());
					}else if (Estimators[i] == "kulczynskicody") { 
						treeCalculators.push_back(new KulczynskiCody());
					}else if (Estimators[i] == "lennon") { 
						treeCalculators.push_back(new Lennon());
					}else if (Estimators[i] == "morisitahorn") { 
						treeCalculators.push_back(new MorHorn());
					}else if (Estimators[i] == "braycurtis") { 
						treeCalculators.push_back(new BrayCurtis());
					}else if (Estimators[i] == "whittaker") { 
						treeCalculators.push_back(new Whittaker());
					}else if (Estimators[i] == "odum") { 
						treeCalculators.push_back(new Odum());
					}else if (Estimators[i] == "canberra") { 
						treeCalculators.push_back(new Canberra());
					}else if (Estimators[i] == "structeuclidean") { 
						treeCalculators.push_back(new StructEuclidean());
					}else if (Estimators[i] == "structchord") { 
						treeCalculators.push_back(new StructChord());
					}else if (Estimators[i] == "hellinger") { 
						treeCalculators.push_back(new Hellinger());
					}else if (Estimators[i] == "manhattan") { 
						treeCalculators.push_back(new Manhattan());
					}else if (Estimators[i] == "structpearson") { 
						treeCalculators.push_back(new StructPearson());
					}else if (Estimators[i] == "soergel") { 
						treeCalculators.push_back(new Soergel());
					}else if (Estimators[i] == "spearman") { 
						treeCalculators.push_back(new Spearman());
					}else if (Estimators[i] == "structkulczynski") { 
						treeCalculators.push_back(new StructKulczynski());
					}else if (Estimators[i] == "speciesprofile") { 
						treeCalculators.push_back(new SpeciesProfile());
					}else if (Estimators[i] == "hamming") { 
						treeCalculators.push_back(new Hamming());
					}else if (Estimators[i] == "structchi2") { 
						treeCalculators.push_back(new StructChi2());
					}else if (Estimators[i] == "gower") { 
						treeCalculators.push_back(new Gower());
					}else if (Estimators[i] == "memchi2") { 
						treeCalculators.push_back(new MemChi2());
					}else if (Estimators[i] == "memchord") { 
						treeCalculators.push_back(new MemChord());
					}else if (Estimators[i] == "memeuclidean") { 
						treeCalculators.push_back(new MemEuclidean());
					}else if (Estimators[i] == "mempearson") { 
						treeCalculators.push_back(new MemPearson());
                    }else if (Estimators[i] == "jsd") {
                        treeCalculators.push_back(new JSD());
                    }else if (Estimators[i] == "rjsd") {
                        treeCalculators.push_back(new RJSD());
                    }

				}
			}
			
			//if the users entered no valid calculators don't execute command
			if (treeCalculators.size() == 0) { m->mothurOut("You have given no valid calculators."); m->mothurOutEndLine(); return 0; }
			
			input = new InputData(sharedfile, "sharedfile");
			lookup = input->getSharedRAbundVectors();
			lastLabel = lookup[0]->getLabel();
			
			if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); return 0; }
			
			//used in tree constructor 
			m->runParse = false;
			
			//create treemap class from groupmap for tree class to use
			ct = new CountTable();
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < m->getAllGroups().size(); i++) { 
                nameMap.insert(m->getAllGroups()[i]); 
                gps.insert(m->getAllGroups()[i]); 
                groupMap[m->getAllGroups()[i]] = m->getAllGroups()[i];
            }
            ct->createTable(nameMap, groupMap, gps);
			
			//clear globaldatas old tree names if any
			m->Treenames.clear();
			
			//fills globaldatas tree names
			//m->Treenames = m->getGroups();
            for (int k = 0; k < lookup.size(); k++) {
                m->Treenames.push_back(lookup[k]->getGroup());
            }
		
			if (m->control_pressed) { return 0; }
			
			//create tree file
			makeSimsShared();
			
			if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } return 0; }
		}else{
			//read in dist file
			filename = inputfile;
            
            ReadMatrix* readMatrix;
			if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }	
			else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
				
			readMatrix->setCutoff(cutoff);
	
            ct = NULL;
            nameMap = NULL;
            if(namefile != ""){	
                nameMap = new NameAssignment(namefile);
                nameMap->readMap();
                readMatrix->read(nameMap);
            }else if (countfile != "") {
                ct = new CountTable();
                ct->readTable(countfile, true, false);
                readMatrix->read(ct);
            }else {
                readMatrix->read(nameMap);
            }

			list = readMatrix->getListVector();
			SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
            
            //clear globaldatas old tree names if any
			m->Treenames.clear();
            
			//make treemap
            if (ct != NULL) { delete ct; }
			ct = new CountTable();
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < list->getNumBins(); i++) {
                string bin = list->get(i);
                nameMap.insert(bin); 
                gps.insert(bin); 
                groupMap[bin] = bin;
                m->Treenames.push_back(bin);
            }
            ct->createTable(nameMap, groupMap, gps);
			
			vector<string> namesGroups = ct->getNamesOfGroups();
			m->setGroups(namesGroups);
			
			//used in tree constructor 
			m->runParse = false;
			
			if (m->control_pressed) { return 0; }
			
			vector< vector<double> > matrix = makeSimsDist(dMatrix);
            delete readMatrix;
            delete dMatrix;
			
			if (m->control_pressed) { return 0; }

			//create a new filename
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
			string outputFile = getOutputFileName("tree",variables);	
			outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
				
			Tree* newTree = createTree(matrix);
            
            if (newTree != NULL) {  writeTree(outputFile, newTree); delete newTree; }
			
			if (m->control_pressed) { return 0; }

			m->mothurOut("Tree complete. "); m->mothurOutEndLine();
			
		}
				
		//reset groups parameter
		m->clearGroups(); 
		
		//set tree file as new current treefile
		string current = "";
		itTypes = outputTypes.find("tree");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "execute");
		exit(1);
	}
}
Пример #6
0
int TreeGroupCommand::execute(){
	try {
	
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
		
		if (format == "sharedfile") {
			InputData input(sharedfile, "sharedfile", Groups);
			SharedRAbundVectors* lookup = input.getSharedRAbundVectors();
			lastLabel = lookup->getLabel();
            Groups = lookup->getNamesGroups();
			
            if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command.\n");   return 0; }
			
			//create treemap class from groupmap for tree class to use
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < Groups.size(); i++) {
                nameMap.insert(Groups[i]);
                gps.insert(Groups[i]);
                groupMap[Groups[i]] = Groups[i];
            }
            ct.createTable(nameMap, groupMap, gps);
			
			//fills tree names with shared files groups
			Treenames = lookup->getNamesGroups();
            
			if (m->getControl_pressed()) { return 0; }
			
			//create tree file
			makeSimsShared(input, lookup, ct);
			
			if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);  }  return 0; }
		}else{
			//read in dist file
			filename = inputfile;
            
            ReadMatrix* readMatrix;
			if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }	
			else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
				
			readMatrix->setCutoff(cutoff);
	
            ListVector* list;
            if(namefile != ""){	
                NameAssignment* nameMap = new NameAssignment(namefile);
                nameMap->readMap();
                readMatrix->read(nameMap);
                list = readMatrix->getListVector();
                delete nameMap;
            }else if (countfile != "") {
                CountTable* ct = new CountTable();
                ct->readTable(countfile, true, false);
                readMatrix->read(ct);
                list = readMatrix->getListVector();
                delete ct;
            }else { NameAssignment* nameMap = NULL; readMatrix->read(nameMap); list = readMatrix->getListVector(); }

			SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
			Treenames.clear();
            
			//make treemap
			CountTable ct;
            set<string> nameMap;
            map<string, string> groupMap;
            set<string> gps;
            for (int i = 0; i < list->getNumBins(); i++) {
                string bin = list->get(i);
                nameMap.insert(bin); 
                gps.insert(bin); 
                groupMap[bin] = bin;
                Treenames.push_back(bin);
            }
            ct.createTable(nameMap, groupMap, gps);
			vector<string> namesGroups = ct.getNamesOfGroups();
			
			if (m->getControl_pressed()) { return 0; }
			
			vector< vector<double> > matrix = makeSimsDist(dMatrix, list->getNumBins());
            delete readMatrix;
            delete dMatrix;
			
			if (m->getControl_pressed()) { return 0; }

			//create a new filename
            map<string, string> variables; 
            variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile));
			string outputFile = getOutputFileName("tree",variables);	
			outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
				
            Tree* newTree = new Tree(&ct, matrix, Treenames);
            if (m->getControl_pressed()) { delete newTree; newTree = NULL; }
            else { newTree->assembleTree(); }
 
            if (newTree != NULL) {  newTree->createNewickFile(outputFile);  delete newTree; }
			
			if (m->getControl_pressed()) { return 0; } m->mothurOut("Tree complete.\n");
		}
				
		//set tree file as new current treefile
		string currentName = "";
		itTypes = outputTypes.find("tree");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); }
		}
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "execute");
		exit(1);
	}
}