int ClusterCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //phylip file given and cutoff not given - use cluster.classic because it uses less memory and is faster if ((format == "phylip") && (cutoff > 10.0)) { m->mothurOutEndLine(); m->mothurOut("You are using a phylip file and no cutoff. I will run cluster.classic to save memory and time."); m->mothurOutEndLine(); //run unique.seqs for deconvolute results string inputString = "phylip=" + distfile; if (namefile != "") { inputString += ", name=" + namefile; } inputString += ", precision=" + toString(precision); inputString += ", method=" + method; if (hard) { inputString += ", hard=T"; } else { inputString += ", hard=F"; } if (sim) { inputString += ", sim=T"; } else { inputString += ", sim=F"; } m->mothurOutEndLine(); m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine(); m->mothurOut("Running command: cluster.classic(" + inputString + ")"); m->mothurOutEndLine(); Command* clusterClassicCommand = new ClusterDoturCommand(inputString); clusterClassicCommand->execute(); delete clusterClassicCommand; m->mothurOut("/------------------------------------------------------------/"); m->mothurOutEndLine(); return 0; } ReadMatrix* read; if (format == "column") { read = new ReadColumnMatrix(columnfile, sim); } //sim indicates whether its a similarity matrix else if (format == "phylip") { read = new ReadPhylipMatrix(phylipfile, sim); } read->setCutoff(cutoff); NameAssignment* nameMap = NULL; if(namefile != ""){ nameMap = new NameAssignment(namefile); nameMap->readMap(); } read->read(nameMap); list = read->getListVector(); matrix = read->getMatrix(); rabund = new RAbundVector(list->getRAbundVector()); delete read; if (m->control_pressed) { //clean up delete list; delete matrix; delete rabund; sabundFile.close();rabundFile.close();listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //create cluster if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); } else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); } else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); } else if(method == "weighted"){ cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method); } tag = cluster->getTag(); if (outputDir == "") { outputDir += m->hasPath(distfile); } fileroot = outputDir + m->getRootName(m->getSimpleName(distfile)); m->openOutputFile(fileroot+ tag + ".sabund", sabundFile); m->openOutputFile(fileroot+ tag + ".rabund", rabundFile); m->openOutputFile(fileroot+ tag + ".list", listFile); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund"); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund"); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list"); time_t estart = time(NULL); float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldRAbund = *rabund; oldList = *list; print_start = true; start = time(NULL); loops = 0; double saveCutoff = cutoff; while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){ if (m->control_pressed) { //clean up delete list; delete matrix; delete rabund; delete cluster; sabundFile.close();rabundFile.close();listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } if (print_start && m->isTrue(timing)) { m->mothurOut("Clustering (" + tag + ") dist " + toString(matrix->getSmallDist()) + "/" + toString(m->roundDist(matrix->getSmallDist(), precision)) + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")"); cout.flush(); print_start = false; } loops++; cluster->update(cutoff); float dist = matrix->getSmallDist(); float rndDist; if (hard) { rndDist = m->ceilDist(dist, precision); }else{ rndDist = m->roundDist(dist, precision); } if(previousDist <= 0.0000 && dist != previousDist){ printData("unique"); } else if(rndDist != rndPreviousDist){ printData(toString(rndPreviousDist, length-1)); } previousDist = dist; rndPreviousDist = rndDist; oldRAbund = *rabund; oldList = *list; } if (print_start && m->isTrue(timing)) { m->mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")"); cout.flush(); print_start = false; } if(previousDist <= 0.0000){ printData("unique"); } else if(rndPreviousDist<cutoff){ printData(toString(rndPreviousDist, length-1)); } delete matrix; delete list; delete rabund; delete cluster; sabundFile.close(); rabundFile.close(); listFile.close(); if (saveCutoff != cutoff) { if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); } else { saveCutoff = m->roundDist(saveCutoff, precision); } m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); } //set list file as new current listfile string current = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //if (m->isTrue(timing)) { m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine(); //} return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "execute"); exit(1); } }
int HClusterCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } NameAssignment* nameMap = NULL; if(namefile != ""){ nameMap = new NameAssignment(namefile); nameMap->readMap(); } time_t estart = time(NULL); if (!sorted) { read = new ReadCluster(distfile, cutoff, outputDir, true); read->setFormat(format); read->read(nameMap); if (m->control_pressed) { delete read; sabundFile.close(); rabundFile.close(); listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } distfile = read->getOutputFile(); list = read->getListVector(); delete read; }else { list = new ListVector(nameMap->getListVector()); } if (m->control_pressed) { sabundFile.close(); rabundFile.close(); listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort. "); m->mothurOutEndLine(); estart = time(NULL); //list vector made by read contains all sequence names if(list != NULL){ rabund = new RAbundVector(list->getRAbundVector()); }else{ m->mothurOut("Error: no list vector!"); m->mothurOutEndLine(); return 0; } list->printHeaders(listFile); float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldRAbund = *rabund; oldList = *list; print_start = true; start = time(NULL); cluster = new HCluster(rabund, list, method, distfile, nameMap, cutoff); vector<seqDist> seqs; seqs.resize(1); // to start loop if (m->control_pressed) { delete cluster; sabundFile.close(); rabundFile.close(); listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } float saveCutoff = cutoff; while (seqs.size() != 0){ seqs = cluster->getSeqs(); //to account for cutoff change in average neighbor if (seqs.size() != 0) { if (seqs[0].dist > cutoff) { break; } } if (m->control_pressed) { delete cluster; sabundFile.close(); rabundFile.close(); listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } for (int i = 0; i < seqs.size(); i++) { //-1 means skip me if (seqs[i].seq1 != seqs[i].seq2) { cutoff = cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist); if (m->control_pressed) { delete cluster; sabundFile.close(); rabundFile.close(); listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } float rndDist; if (hard) { rndDist = m->ceilDist(seqs[i].dist, precision); }else{ rndDist = m->roundDist(seqs[i].dist, precision); } if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){ printData("unique"); } else if((rndDist != rndPreviousDist)){ printData(toString(rndPreviousDist, length-1)); } previousDist = seqs[i].dist; rndPreviousDist = rndDist; oldRAbund = *rabund; oldList = *list; } } } if (m->control_pressed) { delete cluster; sabundFile.close(); rabundFile.close(); listFile.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } if(previousDist <= 0.0000){ printData("unique"); } else if(rndPreviousDist<cutoff){ printData(toString(rndPreviousDist, length-1)); } sabundFile.close(); rabundFile.close(); listFile.close(); delete cluster; if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } if (saveCutoff != cutoff) { if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); } else { saveCutoff = m->roundDist(saveCutoff, precision); } m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); } //set list file as new current listfile string current = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster. "); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "HClusterCommand", "execute"); exit(1); } }
int ClusterDoturCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim); NameAssignment* nameMap = NULL; CountTable* ct = NULL; map<string, int> counts; if(namefile != "") { nameMap = new NameAssignment(namefile); nameMap->readMap(); cluster->readPhylipFile(phylipfile, nameMap); delete nameMap; }else if (countfile != "") { ct = new CountTable(); ct->readTable(countfile, false, false); cluster->readPhylipFile(phylipfile, ct); counts = ct->getNameMap(); delete ct; }else { cluster->readPhylipFile(phylipfile, nameMap); } tag = cluster->getTag(); if (m->getControl_pressed()) { delete cluster; return 0; } list = cluster->getListVector(); rabund = cluster->getRAbundVector(); if (outputDir == "") { outputDir += util.hasPath(phylipfile); } fileroot = outputDir + util.getRootName(util.getSimpleName(phylipfile)); map<string, string> variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string sabundFileName = getOutputFileName("sabund", variables); string rabundFileName = getOutputFileName("rabund", variables); //if (countfile != "") { variables["[tag2]"] = "unique_list"; } string listFileName = getOutputFileName("list", variables); if (countfile == "") { util.openOutputFile(sabundFileName, sabundFile); util.openOutputFile(rabundFileName, rabundFile); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); } util.openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldRAbund = *rabund; oldList = *list; bool printHeaders = true; int estart = time(NULL); int loop = 0; while ((cluster->getSmallDist() <= cutoff) && (cluster->getNSeqs() > 1)){ if (m->getControl_pressed()) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close(); util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); } listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } cluster->update(cutoff); float dist = cluster->getSmallDist(); float rndDist = util.ceilDist(dist, precision); //cout << loop << '\t' << dist << '\t' << oldList.getNumBins() << endl; loop++; if(previousDist <= 0.0000 && dist != previousDist) { printData("unique", counts, printHeaders); } else if(rndDist != rndPreviousDist) { printData(toString(rndPreviousDist, length-1), counts, printHeaders); } previousDist = dist; rndPreviousDist = rndDist; oldRAbund = *rabund; oldList = *list; } if(previousDist <= 0.0000) { printData("unique", counts, printHeaders); } else if(rndPreviousDist<cutoff) { printData(toString(rndPreviousDist, length-1), counts, printHeaders); } if (countfile == "") { sabundFile.close(); rabundFile.close(); } listFile.close(); delete cluster; delete list; delete rabund; //set list file as new current listfile string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ClusterDoturCommand", "execute"); exit(1); } }
int TreeGroupCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (format == "sharedfile") { InputData input(sharedfile, "sharedfile", Groups); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); lastLabel = lookup->getLabel(); Groups = lookup->getNamesGroups(); if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command.\n"); return 0; } //create treemap class from groupmap for tree class to use CountTable ct; set<string> nameMap; map<string, string> groupMap; set<string> gps; for (int i = 0; i < Groups.size(); i++) { nameMap.insert(Groups[i]); gps.insert(Groups[i]); groupMap[Groups[i]] = Groups[i]; } ct.createTable(nameMap, groupMap, gps); //fills tree names with shared files groups Treenames = lookup->getNamesGroups(); if (m->getControl_pressed()) { return 0; } //create tree file makeSimsShared(input, lookup, ct); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } }else{ //read in dist file filename = inputfile; ReadMatrix* readMatrix; if (format == "column") { readMatrix = new ReadColumnMatrix(filename); } else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); } readMatrix->setCutoff(cutoff); ListVector* list; if(namefile != ""){ NameAssignment* nameMap = new NameAssignment(namefile); nameMap->readMap(); readMatrix->read(nameMap); list = readMatrix->getListVector(); delete nameMap; }else if (countfile != "") { CountTable* ct = new CountTable(); ct->readTable(countfile, true, false); readMatrix->read(ct); list = readMatrix->getListVector(); delete ct; }else { NameAssignment* nameMap = NULL; readMatrix->read(nameMap); list = readMatrix->getListVector(); } SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix(); Treenames.clear(); //make treemap CountTable ct; set<string> nameMap; map<string, string> groupMap; set<string> gps; for (int i = 0; i < list->getNumBins(); i++) { string bin = list->get(i); nameMap.insert(bin); gps.insert(bin); groupMap[bin] = bin; Treenames.push_back(bin); } ct.createTable(nameMap, groupMap, gps); vector<string> namesGroups = ct.getNamesOfGroups(); if (m->getControl_pressed()) { return 0; } vector< vector<double> > matrix = makeSimsDist(dMatrix, list->getNumBins()); delete readMatrix; delete dMatrix; if (m->getControl_pressed()) { return 0; } //create a new filename map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile)); string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); Tree* newTree = new Tree(&ct, matrix, Treenames); if (m->getControl_pressed()) { delete newTree; newTree = NULL; } else { newTree->assembleTree(); } if (newTree != NULL) { newTree->createNewickFile(outputFile); delete newTree; } if (m->getControl_pressed()) { return 0; } m->mothurOut("Tree complete.\n"); } //set tree file as new current treefile string currentName = ""; itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "TreeGroupCommand", "execute"); exit(1); } }