void RCorrFuncZ::writeCorrelate() { std::ofstream ofs(getOutputFileName().c_str()); if (ofs.is_open()) { ofs << "#" << getCorrFuncType() << "\n"; ofs << "#selection script1: \"" << selectionScript1_ ; ofs << "\"\tselection script2: \"" << selectionScript2_ << "\"\n"; ofs << "#extra information: " << extra_ << "\n"; ofs << "#time\tcorrVal\n"; for (int i = 0; i < nTimeBins_; ++i) { ofs << times_[i] - times_[0]; for (int j = 0; j < nZBins_; ++j) { ofs << "\t" << histograms_[i][j]; } ofs << "\n"; } } else { sprintf(painCave.errMsg, "RCorrFuncZ::writeCorrelate Error: fail to open %s\n", getOutputFileName().c_str()); painCave.isFatal = 1; simError(); } ofs.close(); }
void BondAngleDistribution::writeBondAngleDistribution() { RealType norm = (RealType)nTotBonds_*((RealType)nTotBonds_-1.0)/2.0; std::ofstream ofs(getOutputFileName().c_str()); if (ofs.is_open()) { Revision r; ofs << "# " << getAnalysisType() << "\n"; ofs << "# OpenMD " << r.getFullRevision() << "\n"; ofs << "# " << r.getBuildDate() << "\n"; ofs << "# selection script: \"" << selectionScript_ << "\"\n"; if (!paramString_.empty()) ofs << "# parameters: " << paramString_ << "\n"; // Normalize by number of frames and write it out: for (int i = 0; i < nBins_; ++i) { RealType Thetaval = i * deltaTheta_; ofs << Thetaval; ofs << "\t" << (RealType)histogram_[i]/norm/frameCounter_; ofs << "\n"; } ofs.close(); } else { sprintf(painCave.errMsg, "BondAngleDistribution: unable to open %s\n", (getOutputFileName() + "q").c_str()); painCave.isFatal = 1; simError(); } }
void BondAngleDistribution::writeBondAngleDistribution() { std::ofstream osbad(getOutputFileName().c_str()); RealType norm = (RealType)nTotBonds_*((RealType)nTotBonds_-1.0)/2.0; if (osbad.is_open()) { // Normalize by number of frames and write it out: for (int i = 0; i < nBins_; ++i) { RealType Thetaval = i * deltaTheta_; osbad << Thetaval; osbad << "\t" << (RealType)histogram_[i]/norm/frameCounter_; osbad << "\n"; } osbad.close(); } else { sprintf(painCave.errMsg, "BondAngleDistribution: unable to open %s\n", (getOutputFileName() + "q").c_str()); painCave.isFatal = 1; simError(); } }
void MomentumCorrFunc::writeCorrelate() { std::ofstream ofs(getOutputFileName().c_str()); if (ofs.is_open()) { ofs << "#" << getCorrFuncType() << "\n"; ofs << "#time\tcorrTensor\txx\txy\txz\tyx\tyy\tyz\tzx\tzy\tzz\n"; for (int i = 0; i < nTimeBins_; ++i) { ofs << time_[i] << "\t" << histogram_[i](0,0) << "\t" << histogram_[i](0,1) << "\t" << histogram_[i](0,2) << "\t" << histogram_[i](1,0) << "\t" << histogram_[i](1,1) << "\t" << histogram_[i](1,2) << "\t" << histogram_[i](2,0) << "\t" << histogram_[i](2,1) << "\t" << histogram_[i](2,2) << "\t" << "\n"; } } else { sprintf(painCave.errMsg, "MomentumCorrFunc::writeCorrelate Error: fail to open %s\n", getOutputFileName().c_str()); painCave.isFatal = 1; simError(); } ofs.close(); }
void HBondGeometric::writeHistogram() { std::ofstream osq(getOutputFileName().c_str()); if (osq.is_open()) { osq << "# HydrogenBonding Statistics\n"; osq << "# selection1: (" << selectionScript1_ << ")" << "\tselection2: (" << selectionScript2_ << ")\n"; osq << "# molecules in selection1: " << nSelected_ << "\n"; osq << "# nHBonds\tnAcceptor\tnDonor\tp(nHBonds)\tp(nAcceptor)\tp(nDonor)\n"; // Normalize by number of frames and write it out: for (int i = 0; i < nBins_; ++i) { osq << i; osq << "\t" << nHBonds_[i]; osq << "\t" << nAcceptor_[i]; osq << "\t" << nDonor_[i]; osq << "\t" << (RealType) (nHBonds_[i]) / nSelected_; osq << "\t" << (RealType) (nAcceptor_[i]) / nSelected_; osq << "\t" << (RealType) (nDonor_[i]) / nSelected_; osq << "\n"; } osq.close(); } else { sprintf(painCave.errMsg, "HBondGeometric: unable to open %s\n", (getOutputFileName() + "q").c_str()); painCave.isFatal = 1; simError(); } }
int AlignCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, util.getRandomNumber(), true); if (m->getControl_pressed()) { outputTypes.clear(); return 0; } time_t start = time(NULL); m->mothurOut("\nAligning sequences from " + fastafile + " ...\n" ); if (outputDir == "") { outputDir += util.hasPath(fastafile); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile)); string alignFileName = getOutputFileName("fasta", variables); string reportFileName = getOutputFileName("alignreport", variables); string accnosFileName = getOutputFileName("accnos", variables); bool hasAccnos = true; vector<long long> numFlipped; numFlipped.push_back(0); //numflipped because reverse was better numFlipped.push_back(0); //total number of sequences with over 50% of bases removed long long numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, fastafile, numFlipped); if (m->getControl_pressed()) { util.mothurRemove(accnosFileName); util.mothurRemove(alignFileName); util.mothurRemove(reportFileName); outputTypes.clear(); return 0; } //delete accnos file if its blank else report to user if (util.isBlank(accnosFileName)) { util.mothurRemove(accnosFileName); hasAccnos = false; } else { m->mothurOut("[WARNING]: " + toString(numFlipped[1]) + " of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well. flip=t"); }else{ m->mothurOut("\n[NOTE]: " + toString(numFlipped[0]) + " of your sequences were reversed to produce a better alignment."); } m->mothurOutEndLine(); } outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName); outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName); if (hasAccnos) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); } m->mothurOut("\nIt took " + toString(time(NULL) - start) + " seconds to align " + toString(numFastaSeqs) + " sequences.\n"); //set align file as new current fastafile string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; current->setFastaFile(currentFasta); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AlignCommand", "execute"); exit(1); } }
void PCOACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) { try { int rank = name_list.size(); double dsum = 0.0000; for(int i=0; i<rank; i++) { dsum += d[i]; for(int j=0; j<rank; j++) { if(d[j] >= 0) { G[i][j] *= pow(d[j],0.5); } else { G[i][j] = 0.00000; } } } ofstream pcaData; map<string, string> variables; variables["[filename]"] = fnameRoot; string pcoaDataFile = getOutputFileName("pcoa",variables); m->openOutputFile(pcoaDataFile, pcaData); pcaData.setf(ios::fixed, ios::floatfield); pcaData.setf(ios::showpoint); outputNames.push_back(pcoaDataFile); outputTypes["pcoa"].push_back(pcoaDataFile); ofstream pcaLoadings; string loadingsFile = getOutputFileName("loadings",variables); m->openOutputFile(loadingsFile, pcaLoadings); pcaLoadings.setf(ios::fixed, ios::floatfield); pcaLoadings.setf(ios::showpoint); outputNames.push_back(loadingsFile); outputTypes["loadings"].push_back(loadingsFile); pcaLoadings << "axis\tloading\n"; for(int i=0; i<rank; i++) { pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl; } pcaData << "group"; for(int i=0; i<rank; i++) { pcaData << '\t' << "axis" << i+1; } pcaData << endl; for(int i=0; i<rank; i++) { pcaData << name_list[i]; for(int j=0; j<rank; j++) { pcaData << '\t' << G[i][j]; } pcaData << endl; } } catch(exception& e) { m->errorOut(e, "PCOACommand", "output"); exit(1); } }
//********************************************************************************************************************** int SffMultipleCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } vector<string> sffFiles, oligosFiles; readFile(sffFiles, oligosFiles); string thisOutputDir = outputDir; if (thisOutputDir == "") { thisOutputDir = m->hasPath(filename); } string fileroot = thisOutputDir + m->getRootName(m->getSimpleName(filename)); map<string, string> variables; variables["[filename]"] = fileroot; string fasta = getOutputFileName("fasta",variables); string name = getOutputFileName("name",variables); string group = getOutputFileName("group",variables); if (m->control_pressed) { return 0; } if (sffFiles.size() < processors) { processors = sffFiles.size(); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #else //trim.flows, shhh.flows cannot handle multiple processors for windows. processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n"); #endif if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); } else { createProcesses(sffFiles, oligosFiles, fasta, name, group); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (append) { outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta); m->setFastaFile(fasta); outputNames.push_back(name); outputTypes["name"].push_back(name); m->setNameFile(name); if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); } } m->setProcessors(toString(processors)); //report output filenames m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int SplitGroupCommand::runNameGroup(){ try { SequenceParser* parser; if (namefile == "") { parser = new SequenceParser(groupfile, fastafile); } else { parser = new SequenceParser(groupfile, fastafile, namefile); } if (m->control_pressed) { delete parser; return 0; } vector<string> namesGroups = parser->getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, namesGroups); string fastafileRoot = outputDir + m->getRootName(m->getSimpleName(fastafile)); string namefileRoot = outputDir + m->getRootName(m->getSimpleName(namefile)); m->mothurOutEndLine(); for (int i = 0; i < Groups.size(); i++) { m->mothurOut("Processing group: " + Groups[i]); m->mothurOutEndLine(); map<string, string> variables; variables["[filename]"] = fastafileRoot; variables["[group]"] = Groups[i]; string newFasta = getOutputFileName("fasta",variables); variables["[filename]"] = namefileRoot; string newName = getOutputFileName("name",variables); parser->getSeqs(Groups[i], newFasta, "/ab=", "/", false); outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta); if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (namefile != "") { parser->getNameMap(Groups[i], newName); outputNames.push_back(newName); outputTypes["name"].push_back(newName); } if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } } delete parser; return 0; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "runNameGroup"); exit(1); } }
//********************************************************************************************************************** string CatchAllCommand::process(SAbundVector* sabund, string file1) { try { map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(file1)); variables["[distance]"] = sabund->getLabel(); string filename = getOutputFileName("sabund", variables); filename = m->getFullPathName(filename); ofstream out; m->openOutputFile(filename, out); if (m->debug) { m->mothurOut("[DEBUG]: Creating " + filename + " file for catchall, shown below.\n\n"); } for (int i = 1; i <= sabund->getMaxRank(); i++) { int temp = sabund->get(i); if (temp != 0) { out << i << "," << temp << endl; if (m->debug) { m->mothurOut(toString(i) + "," + toString(temp) + "\n"); } } } out.close(); if (m->debug) { m->mothurOut("[DEBUG]: Done creating " + filename + " file for catchall, shown above.\n\n"); } return filename; } catch(exception& e) { m->errorOut(e, "CatchAllCommand", "process"); exit(1); } }
void ClassifySvmSharedCommand::trainSharedAndDesignData(vector<SharedRAbundVector*> lookup) { try { LabeledObservationVector labeledObservationVector; FeatureVector featureVector; readSharedRAbundVectors(lookup, designMap, labeledObservationVector, featureVector); SvmDataset svmDataset(labeledObservationVector, featureVector); int evaluationFoldCount = 3; int trainFoldCount = 5; OutputFilter outputFilter(2); OneVsOneMultiClassSvmTrainer t(svmDataset, evaluationFoldCount, trainFoldCount, *this, outputFilter); KernelParameterRangeMap kernelParameterRangeMap; getDefaultKernelParameterRangeMap(kernelParameterRangeMap); t.train(kernelParameterRangeMap); std::cout << "done training" << std::endl; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string filename = getOutputFileName("summary", variables); outputNames.push_back(filename); outputTypes["summary"].push_back(filename); m->mothurOutEndLine(); std::cout << "leaving processSharedAndDesignData" << std::endl; } catch (exception& e) { m->errorOut(e, "ClassifySvmSharedCommand", "trainSharedAndDesignData"); exit(1); } }
af::Msg * Task::getOutput( int i_startcount, RenderContainer * i_renders, std::string & o_filename, std::string & o_error) const { //printf("Task::getOutput:\n"); if( m_progress->starts_count < 1 ) { o_error = "Task is not started."; return NULL; } if( i_startcount > m_progress->starts_count ) { o_error += "Task was started "+af::itos(m_progress->starts_count)+" times ( less than "+af::itos(i_startcount)+" times )."; return NULL; } if( i_startcount == 0 ) { if( m_run ) { return m_run->v_getOutput( i_startcount, i_renders, o_error); } else { i_startcount = m_progress->starts_count; } } o_filename = getOutputFileName( i_startcount); return NULL; }
//********************************************************************************************************************** int GetSeqsCommand::readTax(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)); variables["[extension]"] = m->getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); ofstream out; m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(taxfile, in); string name, tax; bool wroteSomething = false; int selectedCount = 0; if (m->debug) { set<string> temp; sanity["tax"] = temp; } while(!in.eof()){ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } in >> name; //read from first column in >> tax; //read from second column if (!dups) {//adjust name if needed map<string, string>::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { name = it->second; } } //if this name is in the accnos file if (names.count(name) != 0) { wroteSomething = true; out << name << '\t' << tax << endl; selectedCount++; if (m->debug) { sanity["tax"].insert(name); } } m->gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readTax"); exit(1); } }
//********************************************************************************************************************** int GetSeqsCommand::readCount(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); variables["[extension]"] = m->getExtension(countfile); string outputFileName = getOutputFileName("count", variables); ofstream out; m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(countfile, in); bool wroteSomething = false; int selectedCount = 0; string headers = m->getline(in); m->gobble(in); out << headers << endl; string test = headers; vector<string> pieces = m->splitWhiteSpace(test); string name, rest; int thisTotal; rest = ""; while (!in.eof()) { if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } in >> name; m->gobble(in); in >> thisTotal; m->gobble(in); if (pieces.size() > 2) { rest = m->getline(in); m->gobble(in); } if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); } if (names.count(name) != 0) { out << name << '\t' << thisTotal << '\t' << rest << endl; wroteSomething = true; selectedCount+= thisTotal; } } in.close(); out.close(); //check for groups that have been eliminated CountTable ct; if (ct.testGroups(outputFileName)) { ct.readTable(outputFileName, true, false); ct.printTable(outputFileName); } if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readCount"); exit(1); } }
bool OutputFile::openFile(std::string* fileName, const std::string msg, const std::string* path, const std::string ext, const std::string fileType) { if (fileName->empty()) { *fileName = getOutputFileName(msg, *path, ext); if(fileName->empty()) { return false; } } file.reset(new std::ofstream(fileName->c_str(), std::ofstream::trunc)); if(!file->is_open()) { utilityObject->error("Cannot open output file [%s]\n", fileName->c_str()); return false; } name = *fileName; typeOfFileMsg = fileType; return true; }
//********************************************************************************************************************** int RemoveGroupsCommand::readFasta(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)); variables["[extension]"] = m->getExtension(fastafile); string outputFileName = getOutputFileName("fasta", variables); ofstream out; m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(fastafile, in); string name; bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { //if this name is in the accnos file if (names.count(name) == 0) { wroteSomething = true; currSeq.printSequence(out); }else { //if you are not in the accnos file check if you are a name that needs to be changed map<string, string>::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { wroteSomething = true; currSeq.setName(it->second); currSeq.printSequence(out); }else { removedCount++; } } } m->gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); } outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readFasta"); exit(1); } }
int MergeGroupsCommand::processGroupFile(DesignMap*& designMap){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; m->openOutputFile(outputFileName, out); //read groupfile GroupMap groupMap(groupfile); groupMap.readMap(); //fill Groups - checks for "all" and for any typo groups SharedUtil* util = new SharedUtil(); vector<string> nameGroups = groupMap.getNamesOfGroups(); util->setGroups(Groups, nameGroups); delete util; vector<string> namesOfSeqs = groupMap.getNamesSeqs(); bool error = false; for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->control_pressed) { break; } string thisGroup = groupMap.getGroup(namesOfSeqs[i]); //are you in a group the user wants if (m->inUsersGroups(thisGroup, Groups)) { string thisGrouping = designMap->get(thisGroup); if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine(); error = true; } else { out << namesOfSeqs[i] << '\t' << thisGrouping << endl; } } } if (error) { m->control_pressed = true; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processGroupFile"); exit(1); } }
int ListSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read functions fill names vector if (fastafile != "") { inputFileName = fastafile; readFasta(); } else if (fastqfile != "") { inputFileName = fastqfile; readFastq(); } else if (namefile != "") { inputFileName = namefile; readName(); } else if (groupfile != "") { inputFileName = groupfile; readGroup(); } else if (alignfile != "") { inputFileName = alignfile; readAlign(); } else if (listfile != "") { inputFileName = listfile; readList(); } else if (taxfile != "") { inputFileName = taxfile; readTax(); } else if (countfile != "") { inputFileName = countfile; readCount(); } if (m->getControl_pressed()) { outputTypes.clear(); return 0; } //sort in alphabetical order sort(names.begin(), names.end()); if (outputDir == "") { outputDir += util.hasPath(inputFileName); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputFileName)); string outputFileName = getOutputFileName("accnos", variables); util.printAccnos(outputFileName, names); outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFileName); return 0; } current->setAccnosFile(outputFileName); m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFileName); m->mothurOutEndLine(); m->mothurOutEndLine(); //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "execute"); exit(1); } }
vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) { try { map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string outputFileName = getOutputFileName("kruskall-wallis",variables); ofstream out; m->openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName); out << "OTULabel\tKW\tPvalue\n"; vector<int> significantOtuLabels; int numBins = lookup[0]->getNumBins(); //sanity check to make sure each treatment has a group in the shared file set<string> treatments; for (int j = 0; j < lookup.size(); j++) { string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category treatments.insert(treatment); } if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; } LinearAlgebra linear; for (int i = 0; i < numBins; i++) { if (m->control_pressed) { break; } vector<spearmanRank> values; for (int j = 0; j < lookup.size(); j++) { string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category spearmanRank temp(treatment, lookup[j]->getAbundance(i)); values.push_back(temp); } double pValue = 0.0; double H = linear.calcKruskalWallis(values, pValue); //output H and signifigance out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl; if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); } } out.close(); return significantOtuLabels; } catch(exception& e) { m->errorOut(e, "LefseCommand", "runKruskalWallis"); exit(1); } }
//********************************************************************************************************************** int SharedCommand::createSharedFromCount() { try { //getting output filename string filename = countfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out; bool printHeaders = true; util.openOutputFile(filename, out); CountTable ct; ct.readTable(countfile, true, false); map<string, string> seqNameToOtuName; SharedRAbundVectors* lookup = ct.getShared(Groups, seqNameToOtuName); lookup->setLabels(*labels.begin()); lookup->print(out, printHeaders); out.close(); delete lookup; string mapFilename = getOutputFileName("map",variables); outputNames.push_back(mapFilename); outputTypes["map"].push_back(mapFilename); ofstream outMap; util.openOutputFile(mapFilename, outMap); for (map<string, string>::iterator it = seqNameToOtuName.begin(); it != seqNameToOtuName.end(); it++) { outMap << it->first << '\t' << it->second << endl; } outMap.close(); return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "createSharedFromCount"); exit(1); } }
//********************************************************************************************************************** int RemoveGroupsCommand::readGroup(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); ofstream out; m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(groupfile, in); string name, group; bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } in >> name; //read from first column in >> group; //read from second column //if this name is in the accnos file if (names.count(name) == 0) { wroteSomething = true; out << name << '\t' << group << endl; }else { removedCount++; } m->gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); } outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readGroup"); exit(1); } }
void P2OrderParameter::writeP2() { ofstream os(getOutputFileName().c_str()); os << "#radial distribution function\n"; os<< "#selection1: (" << selectionScript1_ << ")\t"; if (!doVect_) { os << "selection2: (" << selectionScript2_ << ")\n"; } os << "#p2\tdirector_x\tdirector_y\tdiretor_z\tangle(degree)\n"; for (size_t i = 0; i < orderParams_.size(); ++i) { os << orderParams_[i].p2 << "\t" << orderParams_[i].director[0] << "\t" << orderParams_[i].director[1] << "\t" << orderParams_[i].director[2] << "\t" << orderParams_[i].angle << "\n"; } }
//*************************************************************************************************************** int DegapSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Degapping sequences from " + fastafile + " ...\n" ); string tempOutputDir = outputDir; if (outputDir == "") { tempOutputDir = util.hasPath(fastafile); } map<string, string> variables; variables["[filename]"] = tempOutputDir + util.getRootName(util.getSimpleName(fastafile)); string degapFile = getOutputFileName("fasta", variables); outputNames.push_back(degapFile); outputTypes["fasta"].push_back(degapFile); long start = time(NULL); int numSeqs = createProcesses(fastafile, degapFile); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to degap " + toString(numSeqs) + " sequences.\n\n"); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "execute"); exit(1); } }
//********************************************************************************************************************** //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name int GetSeqsCommand::readAlign(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)); string outputFileName = getOutputFileName("alignreport", variables); ofstream out; m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(alignfile, in); string name, junk; bool wroteSomething = false; int selectedCount = 0; //read column headers for (int i = 0; i < 16; i++) { if (!in.eof()) { in >> junk; out << junk << '\t'; } else { break; } }
//*************************************************************************************************************** int SummaryQualCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } int start = time(NULL); int numSeqs = 0; vector<int> position; vector<int> averageQ; vector< vector<int> > scores; if (m->control_pressed) { return 0; } if (namefile != "") { nameMap = m->readNames(namefile); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); nameMap = ct.getNameMap(); } vector<unsigned long long> positions; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(qualfile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(linePair(0, 1000)); }else { positions = m->setFilePosFasta(qualfile, numSeqs); if (numSeqs < processors) { processors = numSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif if(processors == 1){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]); } else{ numSeqs = createProcessesCreateSummary(position, averageQ, scores, qualfile); } if (m->control_pressed) { return 0; } //print summary file map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qualfile)); string summaryFile = getOutputFileName("summary",variables); printQual(summaryFile, position, averageQ, scores); if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; } //output results to screen cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); m->mothurOutEndLine(); m->mothurOut("Position\tNumSeqs\tAverageQ"); m->mothurOutEndLine(); for (int i = 0; i < position.size(); i+=100) { float average = averageQ[i] / (float) position[i]; cout << i << '\t' << position[i] << '\t' << average; m->mothurOutJustToLog(toString(i) + "\t" + toString(position[i]) + "\t" + toString(average)); m->mothurOutEndLine(); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(summaryFile); m->mothurOutEndLine(); outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& thisLookUp) { try { //for each combo for (int c = start; c < (start+num); c++) { //get set names string setA = namesOfGroupCombos[c][0]; string setB = namesOfGroupCombos[c][1]; //get filename map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = thisLookUp[0]->getLabel(); variables["[group]"] = setA + "-" + setB; string outputFileName = getOutputFileName("metastats",variables); outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); //int nameLength = outputFileName.length(); //char * output = new char[nameLength]; //strcpy(output, outputFileName.c_str()); //build matrix from shared rabunds //double** data; //data = new double*[thisLookUp[0]->getNumBins()]; vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins()); vector<SharedRAbundVector*> subset; int setACount = 0; int setBCount = 0; for (int i = 0; i < thisLookUp.size(); i++) { string thisGroup = thisLookUp[i]->getGroup(); //is this group for a set we want to compare?? //sorting the sets by putting setB at the back and setA in the front if ((designMap->getGroup(thisGroup) == setB)) { subset.push_back(thisLookUp[i]); setBCount++; }else if ((designMap->getGroup(thisGroup) == setA)) { subset.insert(subset.begin()+setACount, thisLookUp[i]); setACount++; } } if ((setACount == 0) || (setBCount == 0)) { m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); outputNames.pop_back(); }else { //fill data for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { //data[j] = new double[subset.size()]; data2[j].resize(subset.size(), 0.0); for (int i = 0; i < subset.size(); i++) { data2[j][i] = (subset[i]->getAbundance(j)); } } m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount); if (convertSharedToInput) { convertToInput(subset, outputFileName); } m->mothurOutEndLine(); MothurMetastats mothurMeta(threshold, iters); mothurMeta.runMetastats(outputFileName , data2, setACount); m->mothurOutEndLine(); m->mothurOutEndLine(); } //free memory //delete output; //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; } //delete[] data; } return 0; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "driver"); exit(1); } }
int RareFactCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } map<string, set<int> > labelToEnds; if ((format != "sharedfile")) { inputFileNames.push_back(inputfile); } else { inputFileNames = parseSharedFile(sharedfile, labelToEnds); format = "rabund"; } if (m->control_pressed) { return 0; } map<int, string> file2Group; //index in outputNames[i] -> group for (int p = 0; p < inputFileNames.size(); p++) { string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])); if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); } int i; ValidCalculators validCalculator; map<string, string> variables; variables["[filename]"] = fileNameRoot; for (i=0; i<Estimators.size(); i++) { if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) { if (Estimators[i] == "sobs") { rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(getOutputFileName("rarefaction",variables)))); outputNames.push_back(getOutputFileName("rarefaction",variables)); outputTypes["rarefaction"].push_back(getOutputFileName("rarefaction",variables)); }else if (Estimators[i] == "chao") { rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(getOutputFileName("r_chao",variables)))); outputNames.push_back(getOutputFileName("r_chao",variables)); outputTypes["r_chao"].push_back(getOutputFileName("r_chao",variables)); }else if (Estimators[i] == "ace") { if(abund < 5) abund = 10; rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(getOutputFileName("r_ace",variables)))); outputNames.push_back(getOutputFileName("r_ace",variables)); outputTypes["r_ace"].push_back(getOutputFileName("r_ace",variables)); }else if (Estimators[i] == "jack") { rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(getOutputFileName("r_jack",variables)))); outputNames.push_back(getOutputFileName("r_jack",variables)); outputTypes["r_jack"].push_back(getOutputFileName("r_jack",variables)); }else if (Estimators[i] == "shannon") { rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(getOutputFileName("r_shannon",variables)))); outputNames.push_back(getOutputFileName("r_shannon",variables)); outputTypes["r_shannon"].push_back(getOutputFileName("r_shannon",variables)); }else if (Estimators[i] == "shannoneven") { rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(getOutputFileName("r_shannoneven",variables)))); outputNames.push_back(getOutputFileName("r_shannoneven",variables)); outputTypes["r_shannoneven"].push_back(getOutputFileName("r_shannoneven",variables)); }else if (Estimators[i] == "heip") { rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(getOutputFileName("r_heip",variables)))); outputNames.push_back(getOutputFileName("r_heip",variables)); outputTypes["r_heip"].push_back(getOutputFileName("r_heip",variables)); }else if (Estimators[i] == "r_shannonrange") { rDisplays.push_back(new RareDisplay(new RangeShannon(alpha), new ThreeColumnFile(getOutputFileName("r_shannonrange", variables)))); outputNames.push_back(getOutputFileName("r_shannonrange", variables)); outputTypes["r_shannoneven"].push_back(getOutputFileName("r_shannonrange", variables)); }else if (Estimators[i] == "smithwilson") { rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(getOutputFileName("r_smithwilson",variables)))); outputNames.push_back(getOutputFileName("r_smithwilson",variables)); outputTypes["r_smithwilson"].push_back(getOutputFileName("r_smithwilson",variables)); }else if (Estimators[i] == "npshannon") { rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(getOutputFileName("r_npshannon",variables)))); outputNames.push_back(getOutputFileName("r_npshannon",variables)); outputTypes["r_npshannon"].push_back(getOutputFileName("r_npshannon",variables)); }else if (Estimators[i] == "simpson") { rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(getOutputFileName("r_simpson",variables)))); outputNames.push_back(getOutputFileName("r_simpson",variables)); outputTypes["r_simpson"].push_back(getOutputFileName("r_simpson",variables)); }else if (Estimators[i] == "simpsoneven") { rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(getOutputFileName("r_simpsoneven",variables)))); outputNames.push_back(getOutputFileName("r_simpsoneven",variables)); outputTypes["r_simpsoneven"].push_back(getOutputFileName("r_simpsoneven",variables)); }else if (Estimators[i] == "invsimpson") { rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(getOutputFileName("r_invsimpson",variables)))); outputNames.push_back(getOutputFileName("r_invsimpson",variables)); outputTypes["r_invsimpson"].push_back(getOutputFileName("r_invsimpson",variables)); }else if (Estimators[i] == "bootstrap") { rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(getOutputFileName("r_bootstrap",variables)))); outputNames.push_back(getOutputFileName("r_bootstrap",variables)); outputTypes["r_bootstrap"].push_back(getOutputFileName("r_bootstrap",variables)); }else if (Estimators[i] == "coverage") { rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(getOutputFileName("r_coverage",variables)))); outputNames.push_back(getOutputFileName("r_coverage",variables)); outputTypes["r_coverage"].push_back(getOutputFileName("r_coverage",variables)); }else if (Estimators[i] == "nseqs") { rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(getOutputFileName("r_nseqs",variables)))); outputNames.push_back(getOutputFileName("r_nseqs",variables)); outputTypes["r_nseqs"].push_back(getOutputFileName("r_nseqs",variables)); } if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; } } } //if the users entered no valid calculators don't execute command if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; } return 0; } input = new InputData(inputFileNames[p], format); order = input->getOrderVector(); string lastLabel = order->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; } delete input; delete order; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //as long as you are not at the end of the file or done wih the lines you want while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; } delete input; delete order; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(order->getLabel()) == 1){ m->mothurOut(order->getLabel()); m->mothurOutEndLine(); map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel()); set<int> ends; if (itEndings != labelToEnds.end()) { ends = itEndings->second; } rCurve = new Rarefact(order, rDisplays, processors, ends); rCurve->getCurve(freq, nIters); delete rCurve; processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); } if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = order->getLabel(); delete order; order = (input->getOrderVector(lastLabel)); m->mothurOut(order->getLabel()); m->mothurOutEndLine(); map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel()); set<int> ends; if (itEndings != labelToEnds.end()) { ends = itEndings->second; } rCurve = new Rarefact(order, rDisplays, processors, ends); rCurve->getCurve(freq, nIters); delete rCurve; processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); //restore real lastlabel to save below order->setLabel(saveLabel); } lastLabel = order->getLabel(); delete order; order = (input->getOrderVector()); } if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; } delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; } delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //run last label if you need to if (needToRun == true) { if (order != NULL) { delete order; } order = (input->getOrderVector(lastLabel)); m->mothurOut(order->getLabel()); m->mothurOutEndLine(); map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel()); set<int> ends; if (itEndings != labelToEnds.end()) { ends = itEndings->second; } rCurve = new Rarefact(order, rDisplays, processors, ends); rCurve->getCurve(freq, nIters); delete rCurve; delete order; } for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; } rDisplays.clear(); delete input; } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //create summary file containing all the groups data for each label - this function just combines the info from the files already created. if ((sharedfile != "") && (groupMode)) { outputNames = createGroupFile(outputNames, file2Group); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "RareFactCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int ClassifyOtuCommand::process(ListVector* processList) { try{ string conTax; int size; //create output file if (outputDir == "") { outputDir += m->hasPath(listfile); } ofstream out; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile)); variables["[distance]"] = processList->getLabel(); string outputFile = getOutputFileName("constaxonomy", variables); m->openOutputFile(outputFile, out); outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); ofstream outSum; string outputSumFile = getOutputFileName("taxsummary", variables); m->openOutputFile(outputSumFile, outSum); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); out << "OTU\tSize\tTaxonomy" << endl; PhyloSummary* taxaSum; if (countfile != "") { if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); } else { taxaSum = new PhyloSummary(ct); } }else { if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); } else { taxaSum = new PhyloSummary(groupMap); } } vector<ofstream*> outSums; vector<ofstream*> outs; vector<PhyloSummary*> taxaSums; map<string, int> groupIndex; if (persample) { for (int i = 0; i < groups.size(); i++) { groupIndex[groups[i]] = i; ofstream* temp = new ofstream(); variables["[distance]"] = processList->getLabel() + "." + groups[i]; string outputFile = getOutputFileName("constaxonomy", variables); m->openOutputFile(outputFile, *temp); (*temp) << "OTU\tSize\tTaxonomy" << endl; outs.push_back(temp); outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); ofstream* tempSum = new ofstream(); string outputSumFile = getOutputFileName("taxsummary", variables); m->openOutputFile(outputSumFile, *tempSum); outSums.push_back(tempSum); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); PhyloSummary* taxaSumt; if (countfile != "") { if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct); } else { taxaSumt = new PhyloSummary(ct); } }else { if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap); } else { taxaSumt = new PhyloSummary(groupMap); } } taxaSums.push_back(taxaSumt); } } //for each bin in the list vector string snumBins = toString(processList->getNumBins()); for (int i = 0; i < processList->getNumBins(); i++) { if (m->control_pressed) { break; } vector<string> names; string binnames = processList->get(i); vector<string> thisNames; m->splitAtComma(binnames, thisNames); names = findConsensusTaxonomy(thisNames, size, conTax); if (m->control_pressed) { break; } //output to new names file string binLabel = "Otu"; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; out << binLabel << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; m->removeConfidences(noConfidenceConTax); //add this bins taxonomy to summary if (basis == "sequence") { for(int j = 0; j < names.size(); j++) { int numReps = 1; if (countfile != "") { numReps = ct->getNumSeqs(names[j]); } for(int k = 0; k < numReps; k++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); } } }else { //otu map<string, bool> containsGroup; if (countfile != "") { if (ct->hasGroupInfo()) { vector<string> mGroups = ct->getNamesOfGroups(); for (int k = 0; k < names.size(); k++) { vector<int> counts = ct->getGroupCounts(names[k]); for (int h = 0; h < counts.size(); h++) { if (counts[h] != 0) { containsGroup[mGroups[h]] = true; } } } } }else { if (groupfile != "") { vector<string> mGroups = groupMap->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; } for (int k = 0; k < names.size(); k++) { //find out the sequences group string group = groupMap->getGroup(names[k]); if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } else { containsGroup[group] = true; } } } } taxaSum->addSeqToTree(noConfidenceConTax, containsGroup); } if (persample) { //divide names by group map<string, vector<string> > parsedNames; map<string, vector<string> >::iterator itParsed; //parse names by group for (int j = 0; j < names.size(); j++) { if (groupfile != "") { string group = groupMap->getGroup(names[j]); itParsed = parsedNames.find(group); if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); } else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; } }else { //count file was used vector<string> thisSeqsGroups = ct->getGroups(names[j]); for (int k = 0; k < thisSeqsGroups.size(); k++) { string group = thisSeqsGroups[k]; itParsed = parsedNames.find(group); if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); } else { vector<string> tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; } } } } for (itParsed = parsedNames.begin(); itParsed != parsedNames.end(); itParsed++) { vector<string> theseNames = findConsensusTaxonomy(itParsed->second, size, conTax); if (m->control_pressed) { break; } //output to new names file string binLabel = "Otu"; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; m->removeConfidences(noConfidenceConTax); //add this bins taxonomy to summary if (basis == "sequence") { for(int j = 0; j < theseNames.size(); j++) { int numReps = 1; if (countfile != "") { numReps = ct->getGroupCount(theseNames[j], itParsed->first); } //get num seqs for this seq from this group for(int k = 0; k < numReps; k++) { (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(theseNames[j], noConfidenceConTax); } } }else { //otu map<string, bool> containsGroup; containsGroup[itParsed->first] = true; (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(noConfidenceConTax, containsGroup); } } } } out.close(); //print summary file taxaSum->print(outSum); outSum.close(); if (persample) { for (int i = 0; i < groups.size(); i++) { (*outs[i]).close(); taxaSums[i]->print(*outSums[i]); (*outSums[i]).close(); delete outs[i]; delete outSums[i]; delete taxaSums[i]; } } delete taxaSum; return 0; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "process"); exit(1); } }
int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){ try { vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files vector< vector<seqDist> > calcDists; calcDists.resize(matrixCalculators.size()); for (int thisIter = 0; thisIter < iters+1; thisIter++) { map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = thisLookup[0]->getLabel(); variables["[tag2]"] = ""; vector<SharedRAbundVector*> thisItersLookup = thisLookup; if (subsample && (thisIter != 0)) { SubSample sample; vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds //make copy of lookup so we don't get access violations vector<SharedRAbundVector*> newLookup; for (int k = 0; k < thisItersLookup.size(); k++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thisItersLookup[k]->getLabel()); temp->setGroup(thisItersLookup[k]->getGroup()); newLookup.push_back(temp); } //for each bin for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } } tempLabels = sample.getSample(newLookup, subsampleSize); thisItersLookup = newLookup; } if(processors == 1){ driver(thisItersLookup, 0, numGroups, calcDists); }else{ int process = 1; vector<int> processIDS; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); process++; }else if (pid == 0){ driver(thisItersLookup, lines[process].start, lines[process].end, calcDists); string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist"; ofstream outtemp; m->openOutputFile(tempdistFileName, outtemp); for (int i = 0; i < calcDists.size(); i++) { outtemp << calcDists[i].size() << endl; for (int j = 0; j < calcDists[i].size(); j++) { outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; } } outtemp.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } //parent do your part driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); //force parent to wait until all the processes are done for (int i = 0; i < processIDS.size(); i++) { int temp = processIDS[i]; wait(&temp); } for (int i = 0; i < processIDS.size(); i++) { string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) + ".dist"; ifstream intemp; m->openInputFile(tempdistFileName, intemp); for (int k = 0; k < calcDists.size(); k++) { int size = 0; intemp >> size; m->gobble(intemp); for (int j = 0; j < size; j++) { int seq1 = 0; int seq2 = 0; float dist = 1.0; intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); seqDist tempDist(seq1, seq2, dist); calcDists[k].push_back(tempDist); } } intemp.close(); m->mothurRemove(tempdistFileName); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the distSharedData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to pass results vectors. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<distSharedData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=1; i<processors; i++ ){ //make copy of lookup so we don't get access violations vector<SharedRAbundVector*> newLookup; for (int k = 0; k < thisItersLookup.size(); k++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(thisItersLookup[k]->getLabel()); temp->setGroup(thisItersLookup[k]->getGroup()); newLookup.push_back(temp); } //for each bin for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } } // Allocate memory for thread data. distSharedData* tempSum = new distSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup); pDataArray.push_back(tempSum); processIDS.push_back(i); hThreadArray[i-1] = CreateThread(NULL, 0, MyDistSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } //parent do your part driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; } for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } for (int k = 0; k < calcDists.size(); k++) { int size = pDataArray[i]->calcDists[k].size(); for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); } } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif } if (subsample && (thisIter != 0)) { if((thisIter) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter)+"\n"); } calcDistsTotals.push_back(calcDists); for (int i = 0; i < calcDists.size(); i++) { for (int j = 0; j < calcDists[i].size(); j++) { if (m->debug) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n"); } } } //clean up memory for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; } thisItersLookup.clear(); }else { //print results for whole dataset for (int i = 0; i < calcDists.size(); i++) { if (m->control_pressed) { break; } //initialize matrix vector< vector<double> > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } for (int j = 0; j < calcDists[i].size(); j++) { int row = calcDists[i][j].seq1; int column = calcDists[i][j].seq2; double dist = calcDists[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; } variables["[outputtag]"] = output; variables["[calc]"] = matrixCalculators[i]->getName(); string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outDist; m->openOutputFile(distFileName, outDist); outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); printSims(outDist, matrix); outDist.close(); } } for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } } if (iters != 0) { //we need to find the average distance and standard deviation for each groups distance vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals, mode); //find standard deviation vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); //print results for (int i = 0; i < calcDists.size(); i++) { vector< vector<double> > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } vector< vector<double> > stdmatrix; //square matrix to represent the stdDev stdmatrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); } for (int j = 0; j < calcAverages[i].size(); j++) { int row = calcAverages[i][j].seq1; int column = calcAverages[i][j].seq2; float dist = calcAverages[i][j].dist; float stdDist = stdDev[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; stdmatrix[row][column] = stdDist; stdmatrix[column][row] = stdDist; } map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = thisLookup[0]->getLabel(); variables["[outputtag]"] = output; variables["[tag2]"] = "ave"; variables["[calc]"] = matrixCalculators[i]->getName(); string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); //set current phylip file to average distance matrix m->setPhylipFile(distFileName); ofstream outAve; m->openOutputFile(distFileName, outAve); outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); printSims(outAve, matrix); outAve.close(); variables["[tag2]"] = "std"; distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outSTD; m->openOutputFile(distFileName, outSTD); outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint); printSims(outSTD, stdmatrix); outSTD.close(); } } return 0; } catch(exception& e) { m->errorOut(e, "MatrixOutputCommand", "process"); exit(1); } }
MakeGroupCommand::MakeGroupCommand(string option) { try { abort = false; calledHelp = false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { vector<string> myArray = setParameters(); OptionParser parser(option); map<string, string> parameters = parser.getParameters(); ValidParameters validParameter; map<string, string>::iterator it; //check to make sure all parameters are valid for command for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } //initialize outputTypes vector<string> tempOutNames; outputTypes["group"] = tempOutNames; //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } fastaFileName = validParameter.validFile(parameters, "fasta", false); if (fastaFileName == "not found") { //if there is a current fasta file, use it string filename = m->getFastaFile(); if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; } }else { m->splitAtDash(fastaFileName, fastaFileNames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < fastaFileNames.size(); i++) { bool ignore = false; if (fastaFileNames[i] == "current") { fastaFileNames[i] = m->getFastaFile(); if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); filename += m->getRootName(m->getSimpleName(fastaFileNames[i])); } else { m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; //erase from file list fastaFileNames.erase(fastaFileNames.begin()+i); i--; } } if (!ignore) { if (inputDir != "") { string path = m->hasPath(fastaFileNames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; } } ifstream in; bool ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror"); //if you can't open it, try default location if (!ableToOpen) { if (m->getDefaultPath() != "") { //default path is set string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]); m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); ifstream in2; ableToOpen = m->openInputFile(tryPath, in2, "noerror"); in2.close(); fastaFileNames[i] = tryPath; } } //if you can't open it, try default location if (!ableToOpen) { if (m->getOutputDir() != "") { //default path is set string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]); m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); ifstream in2; ableToOpen = m->openInputFile(tryPath, in2, "noerror"); in2.close(); fastaFileNames[i] = tryPath; } } in.close(); if (!ableToOpen) { m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); //erase from file list fastaFileNames.erase(fastaFileNames.begin()+i); i--; }else{ filename += m->getRootName(m->getSimpleName(fastaFileNames[i])); m->setFastaFile(fastaFileNames[i]); } } } //prevent giantic file name map<string, string> variables; variables["[filename]"] = filename; if (fastaFileNames.size() > 3) { variables["[filename]"] = "merge"; } filename = getOutputFileName("group",variables); //make sure there is at least one valid file left if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } } output = validParameter.validFile(parameters, "output", false); if (output == "not found") { output = ""; } else{ filename = output; } groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { m->mothurOut("groups is a required parameter for the make.group command."); m->mothurOutEndLine(); abort = true; } else { m->splitAtDash(groups, groupsNames); } if (groupsNames.size() != fastaFileNames.size()) { m->mothurOut("You do not have the same number of valid fastfile files as groups. This could be because we could not open a fastafile."); m->mothurOutEndLine(); abort = true; } } } catch(exception& e) { m->errorOut(e, "MakeGroupCommand", "MakeGroupCommand"); exit(1); } }