void compute_confusion_matrix(int32_t max_read_length, std::string bam_file, std::string fasta_file, std::string fasta_dir, bool skip_soft_clipped, std::ostream& out){ BamTools::BamReader bam_reader; if (!bam_reader.Open(bam_file)) printErrorAndDie("Failed to open BAM file"); std::string ref_seq; int32_t ref_id; if (fasta_file.compare("N/A") == 0) ref_id = -2; else { readFasta(fasta_file, fasta_dir, ref_seq); ref_id = 0; } int32_t* matrix_counts = new int32_t [25*max_read_length](); int32_t* total_counts = new int32_t [5*max_read_length](); int32_t forward = 0, backward = 0; process_reads(bam_reader, max_read_length, ref_id, ref_seq, fasta_dir, skip_soft_clipped, matrix_counts, total_counts, forward, backward); out << forward << "\n" << backward << std::endl; print_confusion_matrix(matrix_counts, total_counts, max_read_length, out); delete [] matrix_counts; delete [] total_counts; }
int ListSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read functions fill names vector if (fastafile != "") { inputFileName = fastafile; readFasta(); } else if (fastqfile != "") { inputFileName = fastqfile; readFastq(); } else if (namefile != "") { inputFileName = namefile; readName(); } else if (groupfile != "") { inputFileName = groupfile; readGroup(); } else if (alignfile != "") { inputFileName = alignfile; readAlign(); } else if (listfile != "") { inputFileName = listfile; readList(); } else if (taxfile != "") { inputFileName = taxfile; readTax(); } else if (countfile != "") { inputFileName = countfile; readCount(); } if (m->getControl_pressed()) { outputTypes.clear(); return 0; } //sort in alphabetical order sort(names.begin(), names.end()); if (outputDir == "") { outputDir += util.hasPath(inputFileName); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputFileName)); string outputFileName = getOutputFileName("accnos", variables); util.printAccnos(outputFileName, names); outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFileName); return 0; } current->setAccnosFile(outputFileName); m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFileName); m->mothurOutEndLine(); m->mothurOutEndLine(); //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "execute"); exit(1); } }
int main(int argc, char *argv[]) /* hash snpFasta, read through chrN_snpTmp, rewrite with extensions to individual chrom tables */ { struct slName *chromList, *chromPtr; char tableName[64]; if (argc != 2) usage(); snpDb = argv[1]; hSetDb(snpDb); chromList = hAllChromNamesDb(snpDb); errorFileHandle = mustOpen("snpMoltype.errors", "w"); multiFastaHash = readFasta("chrMulti"); for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next) { safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name); if (!hTableExists(tableName)) continue; verbose(1, "chrom = %s\n", chromPtr->name); chromFastaHash = readFasta(chromPtr->name); processSnps(chromPtr->name); } carefulClose(&errorFileHandle); for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next) { safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name); if (!hTableExists(tableName)) continue; recreateDatabaseTable(chromPtr->name); verbose(1, "loading chrom = %s\n", chromPtr->name); loadDatabase(chromPtr->name); } return 0; }
int main(int argc, char **argv){ MultiFasta *fasta; Parameters param; param = readParams(argc, argv); fasta = readFasta(param.fastaname); multialign(fasta, param.kmersize, param.mindiagsize); printFasta(fasta); releaseFasta(fasta); return EXIT_SUCCESS; }
int do_file(char *fname, struct trie *root, int which){ FILE *fasta; char *genome; int glen; int i; char *window; char *rcwindow; int willInsert; char dummy[SEQ_LENGTH]; int win = 0; willInsert = 1; fprintf(stderr, "\n\nInserting sequence from file %s\n", fname); fasta = myfopen(fname, "r"); fgets(dummy, SEQ_LENGTH, fasta); genome = readFasta(fasta, &glen); window = (char *) mymalloc(sizeof(char) * WINDOW_SIZE+1); rcwindow = (char *) mymalloc(sizeof(char) * WINDOW_SIZE+1); i = 0; while (i < glen - WINDOW_SIZE){ if (getWindow(genome, glen, &window, i) == 0 ){ i+=SLIDE_SIZE; continue; } //while (getIndexWindow(fasta, &window, &willInsert) > 0){ //fprintf(stderr, "\rGetting windows from %s\t: %d%%", fname, ((int)(100.0*(float)(i+strlen(window))/(float)strlen(genome)))); if (VERBOSE) printf("%s\n", window); /* insert the window to the trie here */ if (willInsert){ // start location is i+1 // because i starts from 0 here; but // we want the sequence index starting from 1 // STARTING POSITION FOR FORWARD rcomp(window, rcwindow); if (NOREV){ if (strcmp(window, rcwindow) < 0 ) insert(root, window, (i+1), fname, which); else if (strcmp(window, rcwindow) > 0 ) insert(root, rcwindow, (-1*(i+1)), fname, which); } else{ insert(root, window, (i+1), fname, which); insert(root, rcwindow, (-1*(i+1)), fname, which); } /* insert(root, window, (i+1), fname, which); if (!NOREV){ // reverse complement of this window is indexed by its negative // value ENDING POSITION FOR REVERSE COMPLEMENT rcomp(window, rcwindow); if (strcmp(window, rcwindow)) insert(root, rcwindow, (-1*(i+1)), fname, which); }*/ } i+=SLIDE_SIZE; if (i % 10000 == 0) fprintf(stderr,"\r [%i] of [%d]", i, glen); } fprintf(stderr, "\n"); //free(genome); fclose(fasta); free(window); free(rcwindow); return 1; }
void proteinInfer(){ readFasta(); proteinMap(); calcuPro(); proteinRela(); }
int GetSeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //get names you want to keep names = m->readAccnos(accnosfile); if (m->control_pressed) { return 0; } if (countfile != "") { if ((fastafile != "") || (listfile != "") || (taxfile != "")) { m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } } //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (fastqfile != "") { readFastq(); } if (groupfile != "") { readGroup(); } if (countfile != "") { readCount(); } if (alignfile != "") { readAlign(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (qualfile != "") { readQual(); } if (accnosfile2 != "") { compareAccnos(); } if (m->debug) { runSanityCheck(); } if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } } } return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "execute"); exit(1); } }
int RemoveGroupsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //get groups you want to remove if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); } if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all vector<string> namesGroups = groupMap->getNamesOfGroups(); vector<string> checkedGroups; for (int i = 0; i < Groups.size(); i++) { if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); } else { m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); } } if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; } else { Groups = checkedGroups; m->setGroups(Groups); } //fill names with names of sequences that are from the groups we want to remove fillNames(); delete groupMap; }else if (countfile != ""){ if ((fastafile != "") || (listfile != "") || (taxfile != "")) { m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } CountTable ct; ct.readTable(countfile, true, false); if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; } vector<string> gNamesOfGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, gNamesOfGroups); vector<string> namesOfSeqs = ct.getNamesOfSeqs(); sort(Groups.begin(), Groups.end()); for (int i = 0; i < namesOfSeqs.size(); i++) { vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]); if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you names.insert(namesOfSeqs[i]); } } } if (m->control_pressed) { return 0; } //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (groupfile != "") { readGroup(); } if (countfile != "") { readCount(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (sharedfile != "") { readShared(); } if (designfile != "") { readDesign(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); } } itTypes = outputTypes.find("design"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "execute"); exit(1); } }
int main(int argc, char **argv){ char str[1000]; FILE *fasta; FILE *hor; FILE *log; FILE *aligned; int i,j; int count; int nseq, nhor; int *freqs; int swapfreq; int mismatch; int trim; /* pattern match stuff */ FILE *clusterFile; char cfbase[100]; struct cluster *allclusters; struct cluster *prevcluster; struct cluster *current; struct cmember *cm; struct wgsread *allreads; struct wgsread *wgs1, *wgs2; struct asatseq *asat; struct asatseq *patternasat; char *wgsname; int wgsstart, wgsend; int iflag; int isInserted; int cluster_id; int pattern_id; int occurance; int isFINALHOR; /* pattern match stuff ends here */ if (argc != 3 && argc!=4){ fprintf(stderr, "Aligns sequences to the given center star given a two fasta-centroid files.\nThen dumps the alignments\n"); fprintf(stderr, "%s [fasta file][hor fasta] <-trim>\n", argv[0]); exit(0); } trim = 0; fasta = fopen(argv[1], "r"); hor = fopen(argv[2], "r"); if (argc == 4 && !strcmp(argv[3], "-trim")) trim = 1; if (fasta == NULL || hor == NULL) return 0; strcpy(str, argv[1]); for (i=strlen(str)-1;i>=0;i--) if (str[i]=='.'){ str[i] = 0; break; } if (trim) fprintf(stdout, "Will trim.\n"); makevectors(); strcat(str, ".log"); log = fopen(str, "w"); sprintf(str, "center-%s", argv[1]); aligned = fopen(str, "w"); readFasta(fasta, hor, &nseq, &nhor); fprintf(stderr, "%d monomers, and the center are read into memory.\n", nseq); for (i=0;i<nseq;i++){ align(seqs[i], horseqs[0]); /* aligned seq is in Sp, aligned center is in Tp */ fprintf(aligned, ">%s\n", names[i]); fprintf(log, "%s\t%s\n%s\t%s\n", names[i], Sp, hornames[0], Tp); for (j=strlen(Sp)-1;j>=0;j--){ if (!trim) fprintf(aligned, "%c", Sp[j]); else if (Tp[j]!='-') fprintf(aligned, "%c", Sp[j]); if ((strlen(Sp)-j)%60 == 0 && j!=strlen(Sp)-1) fprintf(aligned, "\n"); } fprintf(aligned, "\n"); if (strchr(Tp, '-')!=NULL) fprintf(log, "%s\n", names[i]); fprintf(stderr, "\r%d\tof\t%d", (i+1), nseq); } fprintf(stderr, "\n"); fclose(log); fclose(aligned); return 1; }