SequenceParser::SequenceParser(string groupFile, string fastaFile, vector<string> groupsSelected) { try { m = MothurOut::getInstance(); int error; //read group file groupMap = new GroupMap(groupFile); error = groupMap->readMap(); if (error == 1) { m->control_pressed = true; } //initialize maps vector<string> namesOfGroups = groupMap->getNamesOfGroups(); set<string> selectedGroups; if (groupsSelected.size() != 0) { SharedUtil util; util.setGroups(groupsSelected, namesOfGroups); namesOfGroups = groupsSelected; } for (int i = 0; i < namesOfGroups.size(); i++) { vector<Sequence> temp; seqs[namesOfGroups[i]] = temp; selectedGroups.insert(namesOfGroups[i]); } //read fasta file making sure each sequence is in the group file ifstream in; m->openInputFile(fastaFile, in); while (!in.eof()) { if (m->control_pressed) { break; } Sequence seq(in); m->gobble(in); if (seq.getName() != "") { string group = groupMap->getGroup(seq.getName()); if (selectedGroups.count(group) != 0) { //this is a group we want if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your groupfile, please correct."); m->mothurOutEndLine(); } else { seqs[group].push_back(seq); } } } } in.close(); if (error == 1) { m->control_pressed = true; } } catch(exception& e) { m->errorOut(e, "SequenceParser", "SequenceParser"); exit(1); } }
int MergeGroupsCommand::processGroupFile(DesignMap*& designMap){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; m->openOutputFile(outputFileName, out); //read groupfile GroupMap groupMap(groupfile); groupMap.readMap(); //fill Groups - checks for "all" and for any typo groups SharedUtil* util = new SharedUtil(); vector<string> nameGroups = groupMap.getNamesOfGroups(); util->setGroups(Groups, nameGroups); delete util; vector<string> namesOfSeqs = groupMap.getNamesSeqs(); bool error = false; for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->control_pressed) { break; } string thisGroup = groupMap.getGroup(namesOfSeqs[i]); //are you in a group the user wants if (m->inUsersGroups(thisGroup, Groups)) { string thisGrouping = designMap->get(thisGroup); if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine(); error = true; } else { out << namesOfSeqs[i] << '\t' << thisGrouping << endl; } } } if (error) { m->control_pressed = true; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processGroupFile"); exit(1); } }
int GetOtusCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } groupMap = new GroupMap(groupfile); groupMap->readMap(); //get groups you want to get if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); } //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); vector<string> gNamesOfGroups = groupMap->getNamesOfGroups(); util->setGroups(Groups, gNamesOfGroups); groupMap->setNamesOfGroups(gNamesOfGroups); delete util; if (m->control_pressed) { delete groupMap; return 0; } //read through the list file keeping any otus that contain any sequence from the groups selected readListGroup(); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set list file as new current listfile string current = ""; itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } } return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int SplitGroupCommand::runNameGroup(){ try { SequenceParser* parser; if (namefile == "") { parser = new SequenceParser(groupfile, fastafile); } else { parser = new SequenceParser(groupfile, fastafile, namefile); } if (m->control_pressed) { delete parser; return 0; } vector<string> namesGroups = parser->getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, namesGroups); string fastafileRoot = outputDir + m->getRootName(m->getSimpleName(fastafile)); string namefileRoot = outputDir + m->getRootName(m->getSimpleName(namefile)); m->mothurOutEndLine(); for (int i = 0; i < Groups.size(); i++) { m->mothurOut("Processing group: " + Groups[i]); m->mothurOutEndLine(); map<string, string> variables; variables["[filename]"] = fastafileRoot; variables["[group]"] = Groups[i]; string newFasta = getOutputFileName("fasta",variables); variables["[filename]"] = namefileRoot; string newName = getOutputFileName("name",variables); parser->getSeqs(Groups[i], newFasta, "/ab=", "/", false); outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta); if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (namefile != "") { parser->getNameMap(Groups[i], newName); outputNames.push_back(newName); outputTypes["name"].push_back(newName); } if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } } delete parser; return 0; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "runNameGroup"); exit(1); } }
int MetaStatsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //just used to convert files to test metastats online /****************************************************/ bool convertInputToShared = false; convertSharedToInput = false; if (convertInputToShared) { convertToShared(sharedfile); return 0; } /****************************************************/ designMap = new GroupMap(designfile); designMap->readDesignMap(); input = new InputData(sharedfile, "sharedfile"); lookup = input->getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; //setup the pairwise comparions of sets for metastats //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; //make sure sets are all in designMap SharedUtil* util = new SharedUtil(); vector<string> dGroups = designMap->getNamesOfGroups(); util->setGroups(Sets, dGroups); delete util; int numGroups = Sets.size(); for (int a=0; a<numGroups; a++) { for (int l = 0; l < a; l++) { vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]); namesOfGroupCombos.push_back(groups); } } //only 1 combo if (numGroups == 2) { processors = 1; } else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; } if(processors != 1){ int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } } //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); } if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup[0]->getLabel(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); //restore real lastlabel to save below lookup[0]->setLabel(saveLabel); } lastLabel = lookup[0]->getLabel(); //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } //reset groups parameter m->clearGroups(); delete input; delete designMap; if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "execute"); exit(1); } }
int GetSharedOTUCommand::runShared() { try { InputData input(sharedfile, "sharedfile"); vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); if (Groups.size() == 0) { Groups = m->getGroups(); //make string for outputfile name userGroups = "unique."; for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; } userGroups = userGroups.substr(0, userGroups.length()-1); }else { //sanity check for group names SharedUtil util; vector<string> allGroups = m->getAllGroups(); util.setGroups(Groups, allGroups); } //put groups in map to find easier for(int i = 0; i < Groups.size(); i++) { groupFinder[Groups[i]] = Groups[i]; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); process(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); } if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup[0]->getLabel(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); process(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); //restore real lastlabel to save below lookup[0]->setLabel(saveLabel); } lastLabel = lookup[0]->getLabel(); //get next line to process //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input.getSharedRAbundVectors(); } if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); process(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } //reset groups parameter m->clearGroups(); return 0; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "runShared"); exit(1); } }
int GetSharedOTUCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } if ( sharedfile != "") { runShared(); } else { m->setGroups(Groups); groupMap = new GroupMap(groupfile); int error = groupMap->readMap(); if (error == 1) { delete groupMap; return 0; } if (m->control_pressed) { delete groupMap; return 0; } if (Groups.size() == 0) { Groups = groupMap->getNamesOfGroups(); //make string for outputfile name userGroups = "unique."; for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; } userGroups = userGroups.substr(0, userGroups.length()-1); }else{ //sanity check for group names SharedUtil util; vector<string> namesOfGroups = groupMap->getNamesOfGroups(); util.setGroups(Groups, namesOfGroups); groupMap->setNamesOfGroups(namesOfGroups); } //put groups in map to find easier for(int i = 0; i < Groups.size(); i++) { groupFinder[Groups[i]] = Groups[i]; } if (fastafile != "") { ifstream inFasta; m->openInputFile(fastafile, inFasta); while(!inFasta.eof()) { if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; } Sequence seq(inFasta); m->gobble(inFasta); if (seq.getName() != "") { seqs.push_back(seq); } } inFasta.close(); } ListVector* lastlist = NULL; string lastLabel = ""; //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; ifstream in; m->openInputFile(listfile, in); //as long as you are not at the end of the file or done wih the lines you want while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { if (lastlist != NULL) { delete lastlist; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); delete groupMap; return 0; } list = new ListVector(in); if(allLines == 1 || labels.count(list->getLabel()) == 1){ m->mothurOut(list->getLabel()); process(list); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); m->mothurOut(lastlist->getLabel()); process(lastlist); processedLabels.insert(lastlist->getLabel()); userLabels.erase(lastlist->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); } lastLabel = list->getLabel(); if (lastlist != NULL) { delete lastlist; } lastlist = list; } in.close(); //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { m->mothurOut(lastlist->getLabel()); process(lastlist); processedLabels.insert(lastlist->getLabel()); userLabels.erase(lastlist->getLabel()); } //reset groups parameter m->clearGroups(); if (lastlist != NULL) { delete lastlist; } if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete groupMap; return 0; } } //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } if (output == "accnos") { itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "execute"); exit(1); } }
int CollectSharedCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //if the users entered no valid calculators don't execute command if (cDisplays.size() == 0) { return 0; } for(int i=0;i<cDisplays.size();i++){ cDisplays[i]->setAll(all); } input = new InputData(sharedfile, "sharedfile"); order = input->getSharedOrderVector(); string lastLabel = order->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; //set users groups SharedUtil* util = new SharedUtil(); Groups = m->getGroups(); vector<string> allGroups = m->getAllGroups(); util->setGroups(Groups, allGroups, "collect"); m->setGroups(Groups); m->setAllGroups(allGroups); delete util; while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; } delete order; delete input; m->clearGroups(); return 0; } if(allLines == 1 || labels.count(order->getLabel()) == 1){ m->mothurOut(order->getLabel()); m->mothurOutEndLine(); //create collectors curve cCurve = new Collect(order, cDisplays); cCurve->getSharedCurve(freq); delete cCurve; processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); } //you have a label the user want that is smaller than this label and the last label has not already been processed if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = order->getLabel(); delete order; order = input->getSharedOrderVector(lastLabel); m->mothurOut(order->getLabel()); m->mothurOutEndLine(); //create collectors curve cCurve = new Collect(order, cDisplays); cCurve->getSharedCurve(freq); delete cCurve; processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); //restore real lastlabel to save below order->setLabel(saveLabel); } lastLabel = order->getLabel(); //get next line to process delete order; order = input->getSharedOrderVector(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; } m->clearGroups(); delete input; return 0; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { if (order != NULL) { delete order; } order = input->getSharedOrderVector(lastLabel); m->mothurOut(order->getLabel()); m->mothurOutEndLine(); cCurve = new Collect(order, cDisplays); cCurve->getSharedCurve(freq); delete cCurve; if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; } delete order; delete input; m->clearGroups(); return 0; } delete order; } for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; } //reset groups parameter m->clearGroups(); delete input; m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CollectSharedCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int SplitGroupCommand::runCount(){ try { CountTable ct; ct.readTable(countfile, true, false); if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; } if (m->control_pressed) { return 0; } vector<string> namesGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, namesGroups); //fill filehandles with neccessary ofstreams map<string, string> ffiles; //group -> filename map<string, string> cfiles; //group -> filename for (int i=0; i<Groups.size(); i++) { ofstream ftemp, ctemp; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile)); variables["[group]"] = Groups[i]; string newFasta = getOutputFileName("fasta",variables); outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta); ffiles[Groups[i]] = newFasta; m->openOutputFile(newFasta, ftemp); ftemp.close(); variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile)); string newCount = getOutputFileName("count",variables); outputNames.push_back(newCount); outputTypes["count"].push_back(newCount); cfiles[Groups[i]] = newCount; m->openOutputFile(newCount, ctemp); ctemp << "Representative_Sequence\ttotal\t" << Groups[i] << endl; ctemp.close(); } ifstream in; m->openInputFile(fastafile, in); while (!in.eof()) { Sequence seq(in); m->gobble(in); if (m->control_pressed) { break; } if (seq.getName() != "") { vector<string> thisSeqsGroups = ct.getGroups(seq.getName()); for (int i = 0; i < thisSeqsGroups.size(); i++) { if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print ofstream outf, outc; m->openOutputFileAppend(ffiles[thisSeqsGroups[i]], outf); seq.printSequence(outf); outf.close(); int numSeqs = ct.getGroupCount(seq.getName(), thisSeqsGroups[i]); m->openOutputFileAppend(cfiles[thisSeqsGroups[i]], outc); outc << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl; outc.close(); } } } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "runCount"); exit(1); } }
SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFile, vector<string> groupsSelected) { try { m = MothurOut::getInstance(); int error; //read group file groupMap = new GroupMap(groupFile); error = groupMap->readMap(); if (error == 1) { m->control_pressed = true; } //initialize maps vector<string> namesOfGroups = groupMap->getNamesOfGroups(); set<string> selectedGroups; if (groupsSelected.size() != 0) { SharedUtil util; util.setGroups(groupsSelected, namesOfGroups); namesOfGroups = groupsSelected; } for (int i = 0; i < namesOfGroups.size(); i++) { vector<Sequence> temp; map<string, string> tempMap; seqs[namesOfGroups[i]] = temp; nameMapPerGroup[namesOfGroups[i]] = tempMap; selectedGroups.insert(namesOfGroups[i]); } map<string, string>::iterator it; map<string, string> nameMap; m->readNames(nameFile, nameMap); //read fasta file making sure each sequence is in the group file ifstream in; m->openInputFile(fastaFile, in); while (!in.eof()) { if (m->control_pressed) { break; } Sequence seq(in); m->gobble(in); if (seq.getName() != "") { it = nameMap.find(seq.getName()); if (it != nameMap.end()) { //in namefile vector<string> names; string secondCol = it->second; m->splitAtChar(secondCol, names, ','); map<string, string> splitMap; //group -> name1,name2,... map<string, string>::iterator itSplit; for (int i = 0; i < names.size(); i++) { string group = groupMap->getGroup(names[i]); if (selectedGroups.count(group) != 0) { //this is a group we want if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your names file and not in your group file, please correct.\n"); } else { allSeqsMap[names[i]] = names[0]; itSplit = splitMap.find(group); if (itSplit != splitMap.end()) { //adding seqs to this group (itSplit->second) += "," + names[i]; }else { //first sighting of this group splitMap[group] = names[i]; } } } } //fill nameMapPerGroup - holds all lines in namefile separated by group for (itSplit = splitMap.begin(); itSplit != splitMap.end(); itSplit++) { //grab first name string firstName = ""; for(int i = 0; i < (itSplit->second).length(); i++) { if (((itSplit->second)[i]) != ',') { firstName += ((itSplit->second)[i]); }else { break; } } //group1 -> seq1 -> seq1,seq2,seq3 nameMapPerGroup[itSplit->first][firstName] = itSplit->second; seqs[itSplit->first].push_back(Sequence(firstName, seq.getAligned())); } }else { error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your name file, please correct.\n"); } } } in.close(); if (error == 1) { m->control_pressed = true; } } catch(exception& e) { m->errorOut(e, "SequenceParser", "SequenceParser"); exit(1); } }
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){ try { CountTable countTable; if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->control_pressed = true; return 0; } //read countTable countTable.readTable(countfile, true, false); //fill Groups - checks for "all" and for any typo groups SharedUtil util; vector<string> nameGroups = countTable.getNamesOfGroups(); util.setGroups(Groups, nameGroups); vector<string> dnamesGroups = designMap->getNamesGroups(); //sanity check bool error = false; if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number //is every group in counttable also in designmap for (int i = 0; i < nameGroups.size(); i++) { if (m->control_pressed) { break; } if (!m->inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; } } } if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->control_pressed = true; return 0; } //user selected groups - remove some groups from table if (Groups.size() != nameGroups.size()) { for (int i = 0; i < nameGroups.size(); i++) { if (!m->inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); } } } //ask again in case order changed nameGroups = countTable.getNamesOfGroups(); int numGroups = nameGroups.size(); //create new table CountTable newTable; vector<string> treatments = designMap->getCategory(); map<string, vector<int> > clearedMap; for (int i = 0; i < treatments.size(); i++) { newTable.addGroup(treatments[i]); vector<int> temp; clearedMap[treatments[i]] = temp; } treatments = newTable.getNamesOfGroups(); set<string> namesToRemove; vector<string> namesOfSeqs = countTable.getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->control_pressed) { break; } vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]); map<string, vector<int> > thisSeqsMap = clearedMap; for (int j = 0; j < numGroups; j++) { thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]); } //create new counts for seq for new table vector<int> newCounts; int totalAbund = 0; for (int j = 0; j < treatments.size(); j++){ int abund = mergeAbund(thisSeqsMap[treatments[j]]); newCounts.push_back(abund); //order matters, add in count for each treatment in new table. totalAbund += abund; } //add seq to new table if(totalAbund == 0) { namesToRemove.insert(namesOfSeqs[i]); }else { newTable.push_back(namesOfSeqs[i], newCounts); } } if (error) { m->control_pressed = true; return 0; } //remove sequences zeroed out by median method if (namesToRemove.size() != 0) { //print names ofstream out; string accnosFile = "accnosFile.temp"; m->openOutputFile(accnosFile, out); //output to .accnos file for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) { if (m->control_pressed) { out.close(); m->mothurRemove(accnosFile); return 0; } out << *it << endl; } out.close(); //run remove.seqs string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map<string, vector<string> > filenames = removeCommand->getOutputFiles(); delete removeCommand; m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurRemove(accnosFile); } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); variables["[extension]"] = m->getExtension(countfile); string outputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); newTable.printTable(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processCountFile"); exit(1); } }
int HomovaCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //read design file designMap = new GroupMap(designFileName); designMap->readDesignMap(); if (outputDir == "") { outputDir = m->hasPath(phylipFileName); } //read in distance matrix and square it ReadPhylipVector readMatrix(phylipFileName); vector<string> sampleNames = readMatrix.read(distanceMatrix); if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets SharedUtil util; vector<string> dGroups = designMap->getNamesOfGroups(); util.setGroups(Sets, dGroups); for(int i=0;i<distanceMatrix.size();i++){ if (m->control_pressed) { delete designMap; return 0; } string group = designMap->getGroup(sampleNames[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }else if (!m->inUsersGroups(group, Sets)){ //not in set we want remove it //remove from all other rows for(int j=0;j<distanceMatrix.size();j++){ distanceMatrix[j].erase(distanceMatrix[j].begin()+i); } distanceMatrix.erase(distanceMatrix.begin()+i); sampleNames.erase(sampleNames.begin()+i); i--; } } } for(int i=0;i<distanceMatrix.size();i++){ for(int j=0;j<i;j++){ distanceMatrix[i][j] *= distanceMatrix[i][j]; } } //link designMap to rows/columns in distance matrix map<string, vector<int> > origGroupSampleMap; for(int i=0;i<sampleNames.size();i++){ string group = designMap->getGroup(sampleNames[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }else { origGroupSampleMap[group].push_back(i); } } int numGroups = origGroupSampleMap.size(); if (m->control_pressed) { delete designMap; return 0; } //create a new filename ofstream HOMOVAFile; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipFileName)); string HOMOVAFileName = getOutputFileName("homova", variables); m->openOutputFile(HOMOVAFileName, HOMOVAFile); outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName); HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl; m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n"); double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha); if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){ int numCombos = numGroups * (numGroups-1) / 2; double pairwiseAlpha = experimentwiseAlpha / (double) numCombos; map<string, vector<int> >::iterator itA; map<string, vector<int> >::iterator itB; for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){ itB = itA;itB++; for(;itB!=origGroupSampleMap.end();itB++){ map<string, vector<int> > pairwiseGroupSampleMap; pairwiseGroupSampleMap[itA->first] = itA->second; pairwiseGroupSampleMap[itB->first] = itB->second; runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha); } } HOMOVAFile << endl; m->mothurOutEndLine(); m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n'); } else{ m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); } m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n"); delete designMap; m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "HomovaCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int RemoveRareCommand::processList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile); string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile); ofstream out, outGroup; m->openOutputFile(outputFileName, out); bool wroteSomething = false; //you must provide a label because the names in the listfile need to be consistent string thisLabel = ""; if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); } else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); } else { thisLabel = *labels.begin(); } InputData input(listfile, "list"); ListVector* list = input.getListVector(); //get first one or the one we want if (thisLabel != "") { //use smart distancing set<string> userLabels; userLabels.insert(thisLabel); set<string> processedLabels; string lastLabel = list->getLabel(); while((list != NULL) && (userLabels.size() != 0)) { if(userLabels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); delete list; list = input.getListVector(lastLabel); break; } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } if (userLabels.size() != 0) { m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); list = input.getListVector(lastLabel); } } //if groupfile is given then use it GroupMap* groupMap; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); SharedUtil util; vector<string> namesGroups = groupMap->getNamesOfGroups(); util.setGroups(Groups, namesGroups); m->openOutputFile(outputGroupFileName, outGroup); } if (list != NULL) { //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list->get(i); vector<string> names; string saveBinNames = binnames; m->splitAtComma(binnames, names); vector<string> newGroupFile; if (groupfile != "") { vector<string> newNames; saveBinNames = ""; for(int k = 0; k < names.size(); k++) { string group = groupMap->getGroup(names[k]); if (m->inUsersGroups(group, Groups)) { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); saveBinNames += names[k] + ","; } } names = newNames; saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); } if (names.size() > nseqs) { //keep bin newList.push_back(saveBinNames); for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.print(out); } } out.close(); if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processList"); exit(1); } }
int RemoveGroupsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //get groups you want to remove if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); } if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all vector<string> namesGroups = groupMap->getNamesOfGroups(); vector<string> checkedGroups; for (int i = 0; i < Groups.size(); i++) { if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); } else { m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); } } if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; } else { Groups = checkedGroups; m->setGroups(Groups); } //fill names with names of sequences that are from the groups we want to remove fillNames(); delete groupMap; }else if (countfile != ""){ if ((fastafile != "") || (listfile != "") || (taxfile != "")) { m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } CountTable ct; ct.readTable(countfile, true, false); if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; } vector<string> gNamesOfGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, gNamesOfGroups); vector<string> namesOfSeqs = ct.getNamesOfSeqs(); sort(Groups.begin(), Groups.end()); for (int i = 0; i < namesOfSeqs.size(); i++) { vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]); if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you names.insert(namesOfSeqs[i]); } } } if (m->control_pressed) { return 0; } //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (groupfile != "") { readGroup(); } if (countfile != "") { readCount(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (sharedfile != "") { readShared(); } if (designfile != "") { readDesign(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); } } itTypes = outputTypes.find("design"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "execute"); exit(1); } }
int CorrAxesCommand::getMetadata(){ try { vector<string> groupNames; ifstream in; m->openInputFile(metadatafile, in); string headerLine = m->getline(in); m->gobble(in); istringstream iss (headerLine,istringstream::in); //read the first label, because it refers to the groups string columnLabel; iss >> columnLabel; m->gobble(iss); //save names of columns you are reading while (!iss.eof()) { iss >> columnLabel; m->gobble(iss); metadataLabels.push_back(columnLabel); } int count = metadataLabels.size(); //read rest of file while (!in.eof()) { if (m->control_pressed) { in.close(); return 0; } string group = ""; in >> group; m->gobble(in); groupNames.push_back(group); SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector(); tempLookup->setGroup(group); tempLookup->setLabel("1"); for (int i = 0; i < count; i++) { float temp = 0.0; in >> temp; tempLookup->push_back(temp, group); } lookupFloat.push_back(tempLookup); m->gobble(in); } in.close(); //remove any groups the user does not want, and set globaldata->groups with only valid groups SharedUtil* util; util = new SharedUtil(); Groups = m->getGroups(); util->setGroups(Groups, groupNames); m->setGroups(Groups); for (int i = 0; i < lookupFloat.size(); i++) { //if this sharedrabund is not from a group the user wants then delete it. if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) { delete lookupFloat[i]; lookupFloat[i] = NULL; lookupFloat.erase(lookupFloat.begin()+i); i--; } } delete util; return 0; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "getMetadata"); exit(1); } }
int ParsimonyCommand::execute() { try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //randomtree will tell us if user had their own treefile or if they just want the random distribution //user has entered their own tree if (randomtree == "") { m->setTreeFile(treefile); TreeReader* reader; if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); } else { reader = new TreeReader(treefile, countfile); } T = reader->getTrees(); ct = T[0]->getCountTable(); delete reader; if(outputDir == "") { outputDir += m->hasPath(treefile); } map<string, string> variables; variables["[filename]"] = outputDir + m->getSimpleName(treefile) + "."; output = new ColumnFile(getOutputFileName("parsimony",variables), itersString); outputNames.push_back(getOutputFileName("parsimony",variables)); outputTypes["parsimony"].push_back(getOutputFileName("parsimony",variables)); sumFile = getOutputFileName("psummary",variables); m->openOutputFile(sumFile, outSum); outputNames.push_back(sumFile); outputTypes["psummary"].push_back(sumFile); }else { //user wants random distribution getUserInput(); if(outputDir == "") { outputDir += m->hasPath(randomtree); } output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString); outputNames.push_back(outputDir+ m->getSimpleName(randomtree)); outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree)); } //set users groups to analyze SharedUtil util; vector<string> mGroups = m->getGroups(); vector<string> tGroups = ct->getNamesOfGroups(); util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze util.getCombos(groupComb, mGroups, numComp); m->setGroups(mGroups); if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } Parsimony pars; counter = 0; Progress* reading; reading = new Progress("Comparing to random:", iters); if (m->control_pressed) { delete reading; delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get pscore for users tree userData.resize(numComp,0); //data = AB, AC, BC, ABC. randomData.resize(numComp,0); //data = AB, AC, BC, ABC. rscoreFreq.resize(numComp); uscoreFreq.resize(numComp); rCumul.resize(numComp); uCumul.resize(numComp); userTreeScores.resize(numComp); UScoreSig.resize(numComp); if (randomtree == "") { //get pscores for users trees for (int i = 0; i < T.size(); i++) { userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC. if (m->control_pressed) { delete reading; delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output scores for each combination for(int k = 0; k < numComp; k++) { //update uscoreFreq map<int,double>::iterator it = uscoreFreq[k].find(userData[k]); if (it == uscoreFreq[k].end()) {//new score uscoreFreq[k][userData[k]] = 1; }else{ uscoreFreq[k][userData[k]]++; } //add users score to valid scores validScores[userData[k]] = userData[k]; //save score for summary file userTreeScores[k].push_back(userData[k]); } } //get pscores for random trees for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users randT = new Tree(ct); //create random relationships between nodes randT->assembleRandomTree(); //get pscore of random tree randomData = pars.getValues(randT, processors, outputDir); if (m->control_pressed) { delete reading; delete output; delete randT; if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } m->clearGroups(); return 0; } for(int r = 0; r < numComp; r++) { //add trees pscore to map of scores map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]); if (it != rscoreFreq[r].end()) {//already have that score rscoreFreq[r][randomData[r]]++; }else{//first time we have seen this score rscoreFreq[r][randomData[r]] = 1; } //add randoms score to validscores validScores[randomData[r]] = randomData[r]; } //update progress bar reading->update(j); delete randT; } }else { //get pscores for random trees for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users randT = new Tree(ct); //create random relationships between nodes randT->assembleRandomTree(); if (m->control_pressed) { delete reading; delete output; delete randT; delete ct; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //get pscore of random tree randomData = pars.getValues(randT, processors, outputDir); if (m->control_pressed) { delete reading; delete output; delete randT; delete ct; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } for(int r = 0; r < numComp; r++) { //add trees pscore to map of scores map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]); if (it != rscoreFreq[r].end()) {//already have that score rscoreFreq[r][randomData[r]]++; }else{//first time we have seen this score rscoreFreq[r][randomData[r]] = 1; } //add randoms score to validscores validScores[randomData[r]] = randomData[r]; } //update progress bar reading->update(j); delete randT; } } for(int a = 0; a < numComp; a++) { float rcumul = 0.0000; float ucumul = 0.0000; //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { if (randomtree == "") { map<int,double>::iterator it2 = uscoreFreq[a].find(it->first); //user data has that score if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; } else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score //make uCumul map uCumul[a][it->first] = ucumul; } //make rscoreFreq map and rCumul map<int,double>::iterator it2 = rscoreFreq[a].find(it->first); //get percentage of random trees with that info if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; } else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score rCumul[a][it->first] = rcumul; } //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file for (int h = 0; h < userTreeScores[a].size(); h++) { UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]); } } if (m->control_pressed) { delete reading; delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //finish progress bar reading->finish(); delete reading; printParsimonyFile(); if (randomtree == "") { printUSummaryFile(); } delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "execute"); exit(1); } }