SequenceCountParser::SequenceCountParser(string countfile, string fastafile) { try { m = MothurOut::getInstance(); //read count file CountTable countTable; countTable.readTable(countfile, true, false); //initialize maps namesOfGroups = countTable.getNamesOfGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { vector<Sequence> temp; map<string, int> tempMap; seqs[namesOfGroups[i]] = temp; countTablePerGroup[namesOfGroups[i]] = tempMap; } //read fasta file making sure each sequence is in the group file ifstream in; m->openInputFile(fastafile, in); int fastaCount = 0; while (!in.eof()) { if (m->control_pressed) { break; } Sequence seq(in); m->gobble(in); fastaCount++; if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } } if (seq.getName() != "") { allSeqsMap[seq.getName()] = seq.getName(); vector<int> groupCounts = countTable.getGroupCounts(seq.getName()); for (int i = 0; i < namesOfGroups.size(); i++) { if (groupCounts[i] != 0) { seqs[namesOfGroups[i]].push_back(seq); countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i]; } } } } in.close(); } catch(exception& e) { m->errorOut(e, "SequenceCountParser", "SequenceCountParser"); exit(1); } }
SequenceCountParser::SequenceCountParser(string fastafile, CountTable& countTable) { try { m = MothurOut::getInstance(); //initialize maps if (countTable.hasGroupInfo()) { namesOfGroups = countTable.getNamesOfGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { vector<Sequence> temp; map<string, int> tempMap; seqs[namesOfGroups[i]] = temp; countTablePerGroup[namesOfGroups[i]] = tempMap; } //read fasta file making sure each sequence is in the group file ifstream in; m->openInputFile(fastafile, in); int fastaCount = 0; while (!in.eof()) { if (m->control_pressed) { break; } Sequence seq(in); m->gobble(in); fastaCount++; if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } } if (seq.getName() != "") { allSeqsMap[seq.getName()] = seq.getName(); vector<int> groupCounts = countTable.getGroupCounts(seq.getName()); for (int i = 0; i < namesOfGroups.size(); i++) { if (groupCounts[i] != 0) { seqs[namesOfGroups[i]].push_back(seq); countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i]; } } } } in.close(); }else { m->control_pressed = true; m->mothurOut("[ERROR]: cannot parse fasta file by group with a count table that does not include group data, please correct.\n"); } } catch(exception& e) { m->errorOut(e, "SequenceCountParser", "SequenceCountParser"); exit(1); } }
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){ try { CountTable countTable; if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; } //read countTable countTable.readTable(countfile, true, false); //fill Groups - checks for "all" and for any typo groups vector<string> nameGroups = countTable.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } vector<string> dnamesGroups = designMap->getNamesGroups(); //sanity check bool error = false; if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number //is every group in counttable also in designmap for (int i = 0; i < nameGroups.size(); i++) { if (m->getControl_pressed()) { break; } if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; } } } if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; } //user selected groups - remove some groups from table if (Groups.size() != nameGroups.size()) { for (int i = 0; i < nameGroups.size(); i++) { if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); } } } //ask again in case order changed nameGroups = countTable.getNamesOfGroups(); int numGroups = nameGroups.size(); //create new table CountTable newTable; vector<string> treatments = designMap->getCategory(); map<string, vector<int> > clearedMap; for (int i = 0; i < treatments.size(); i++) { newTable.addGroup(treatments[i]); vector<int> temp; clearedMap[treatments[i]] = temp; } treatments = newTable.getNamesOfGroups(); set<string> namesToRemove; vector<string> namesOfSeqs = countTable.getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->getControl_pressed()) { break; } vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]); map<string, vector<int> > thisSeqsMap = clearedMap; for (int j = 0; j < numGroups; j++) { thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]); } //create new counts for seq for new table vector<int> newCounts; int totalAbund = 0; for (int j = 0; j < treatments.size(); j++){ int abund = mergeAbund(thisSeqsMap[treatments[j]]); newCounts.push_back(abund); //order matters, add in count for each treatment in new table. totalAbund += abund; } //add seq to new table if(totalAbund == 0) { namesToRemove.insert(namesOfSeqs[i]); }else { newTable.push_back(namesOfSeqs[i], newCounts); } } if (error) { m->setControl_pressed(true); return 0; } //remove sequences zeroed out by median method if (namesToRemove.size() != 0) { //print names ofstream out; string accnosFile = "accnosFile.temp"; util.openOutputFile(accnosFile, out); //output to .accnos file for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(accnosFile); return 0; } out << *it << endl; } out.close(); //run remove.seqs string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine(); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map<string, vector<string> > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); util.mothurRemove(accnosFile); } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(countfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); newTable.printTable(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processCountFile"); exit(1); } }