void twoBitToFa(char *inName, char *outName) /* twoBitToFa - Convert all or part of twoBit file to fasta. */ { struct twoBitFile *tbf; FILE *outFile = mustOpen(outName, "w"); struct twoBitSpec *tbs; if (clSeq != NULL) { char seqSpec[2*PATH_LEN]; if (clEnd > clStart) safef(seqSpec, sizeof(seqSpec), "%s:%s:%d-%d", inName, clSeq, clStart, clEnd); else safef(seqSpec, sizeof(seqSpec), "%s:%s", inName, clSeq); tbs = twoBitSpecNew(seqSpec); } else if (clSeqList != NULL) tbs = twoBitSpecNewFile(inName, clSeqList); else tbs = twoBitSpecNew(inName); if (tbs == NULL) errAbort("%s is not a twoBit file", inName); if (tbs->seqs != NULL && clBpt != NULL) tbf = twoBitOpenExternalBptIndex(tbs->fileName, clBpt); else tbf = twoBitOpen(tbs->fileName); if (clBed != NULL) { processSeqsFromBed(tbf, clBed, outFile); } else { if (tbs->seqs == NULL) processAllSeqs(tbf, outFile); else processSeqSpecs(tbf, tbs->seqs, outFile); } twoBitSpecFree(&tbs); carefulClose(&outFile); twoBitClose(&tbf); }
void searchOneIndex(int fileCount, char *files[], struct genoFind *gf, char *outName, boolean isProt, struct hash *maskHash, FILE *outFile, boolean showStatus) /* Search all sequences in all files against single genoFind index. */ { int i; char *fileName; int count = 0; long long totalSize = 0; gfOutputHead(gvo, outFile); for (i=0; i<fileCount; ++i) { fileName = files[i]; if (nibIsFile(fileName)) { struct dnaSeq *seq; if (isProt) errAbort("%s: Can't use .nib files with -prot or d=prot option\n", fileName); seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName); freez(&seq->name); seq->name = cloneString(fileName); searchOneMaskTrim(seq, isProt, gf, outFile, maskHash, &totalSize, &count); freeDnaSeq(&seq); } else if (twoBitIsSpec(fileName)) { struct twoBitSpec *tbs = twoBitSpecNew(fileName); struct twoBitFile *tbf = twoBitOpen(tbs->fileName); if (isProt) errAbort("%s is a two bit file, which doesn't work for proteins.", fileName); if (tbs->seqs != NULL) { struct twoBitSeqSpec *ss = NULL; for (ss = tbs->seqs; ss != NULL; ss = ss->next) { struct dnaSeq *seq = twoBitReadSeqFrag(tbf, ss->name, ss->start, ss->end); searchOneMaskTrim(seq, isProt, gf, outFile, maskHash, &totalSize, &count); dnaSeqFree(&seq); } } else { struct twoBitIndex *index = NULL; for (index = tbf->indexList; index != NULL; index = index->next) { struct dnaSeq *seq = twoBitReadSeqFrag(tbf, index->name, 0, 0); searchOneMaskTrim(seq, isProt, gf, outFile, maskHash, &totalSize, &count); dnaSeqFree(&seq); } } twoBitClose(&tbf); } else { static struct dnaSeq seq; struct lineFile *lf = lineFileOpen(fileName, TRUE); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { searchOneMaskTrim(&seq, isProt, gf, outFile, maskHash, &totalSize, &count); } lineFileClose(&lf); } } carefulClose(&outFile); if (showStatus) printf("Searched %lld bases in %d sequences\n", totalSize, count); }