void gfiGetSeqName(char *spec, char *name, char *file) /* Extract sequence name and optionally file name from spec, * which includes nib and 2bit files. (The file may be NULL * if you don't care.) */ { if (nibIsFile(spec)) { splitPath(spec, NULL, name, NULL); if (file != NULL) strcpy(file, spec); } else { char *s = strchr(spec, ':'); if (s == NULL) errAbort("Expecting colon in %s", spec); strcpy(name, s+1); if (file != NULL) { int fileNameSize = s - spec; memcpy(file, spec, fileNameSize); file[fileNameSize] = 0; } } }
struct dnaSeq *gfiExpandAndLoadCached(struct gfRange *range, struct hash *tFileCache, char *tSeqDir, int querySize, int *retTotalSeqSize, boolean respectFrame, boolean isRc, int expansion) /* Expand range to cover an additional expansion bases on either side. * Load up target sequence and return. (Done together because don't * know target size before loading.) */ { struct dnaSeq *target = NULL; char fileName[PATH_LEN+256]; safef(fileName, sizeof(fileName), "%s/%s", tSeqDir, range->tName); if (nibIsFile(fileName)) { struct nibInfo *nib = hashFindVal(tFileCache, fileName); if (nib == NULL) { nib = nibInfoNew(fileName); hashAdd(tFileCache, fileName, nib); } if (isRc) reverseIntRange(&range->tStart, &range->tEnd, nib->size); gfiExpandRange(range, querySize, nib->size, respectFrame, isRc, expansion); target = nibLdPart(fileName, nib->f, nib->size, range->tStart, range->tEnd - range->tStart); if (isRc) { reverseComplement(target->dna, target->size); reverseIntRange(&range->tStart, &range->tEnd, nib->size); } *retTotalSeqSize = nib->size; } else { struct twoBitFile *tbf = NULL; char *tSeqName = strchr(fileName, ':'); int tSeqSize = 0; if (tSeqName == NULL) errAbort("No colon in .2bit response from gfServer"); *tSeqName++ = 0; tbf = hashFindVal(tFileCache, fileName); if (tbf == NULL) { tbf = twoBitOpen(fileName); hashAdd(tFileCache, fileName, tbf); } tSeqSize = twoBitSeqSize(tbf, tSeqName); if (isRc) reverseIntRange(&range->tStart, &range->tEnd, tSeqSize); gfiExpandRange(range, querySize, tSeqSize, respectFrame, isRc, expansion); target = twoBitReadSeqFragLower(tbf, tSeqName, range->tStart, range->tEnd); if (isRc) { reverseComplement(target->dna, target->size); reverseIntRange(&range->tStart, &range->tEnd, tSeqSize); } *retTotalSeqSize = tSeqSize; } return target; }
struct dnaSeq *dnaLoadSingle(char *fileName, int *retStart, int *retEnd, int *retParentSize) /* Return sequence if it's a nib file or 2bit part, NULL otherwise. */ { struct dnaSeq *seq = NULL; unsigned start = 0, end = 0; int parentSize = 0; if (nibIsFile(fileName)) { /* Save offset out of fileName for auto-lifting */ char filePath[PATH_LEN]; char name[PATH_LEN]; nibParseName(0, fileName, filePath, name, &start, &end); if (end != 0) /* It's just a range. */ { FILE *f; int size; nibOpenVerify(filePath, &f, &size); parentSize = size; } seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName); if (end == 0) parentSize = end = seq->size; freez(&seq->name); seq->name = cloneString(name); } else if (twoBitIsRange(fileName)) { /* Save offset out of fileName for auto-lifting */ char *rangeSpec = cloneString(fileName); int start, end; char *file, *seqName; twoBitParseRange(rangeSpec, &file, &seqName, &start, &end); /* Load sequence. */ { struct twoBitFile *tbf = twoBitOpen(file); parentSize = twoBitSeqSize(tbf, seqName); seq = twoBitReadSeqFrag(tbf, seqName, start, end); twoBitClose(&tbf); } if (end == 0) end = seq->size; freez(&rangeSpec); } if (retStart != NULL) *retStart = start; if (retEnd != NULL) *retEnd = end; if (retParentSize != NULL) *retParentSize = parentSize; return seq; }
static void gfFileCacheFreeEl(struct hashEl *el) /* Free up one file cache info. */ { char *name = el->name; if (nibIsFile(name)) { struct nibInfo *nib = el->val; nibInfoFree(&nib); } else { struct twoBitFile *tbf = el->val; twoBitClose(&tbf); } el->val = NULL; }
struct hash *loadGeno(char *genoFile) /* load genome sequences into a hash. This supports the multi-sequence * specs of twoBitLoadAll */ { struct dnaSeq *genos = NULL, *geno; struct hash *genoHash = hashNew(0); if (nibIsFile(genoFile)) genos = nibLoadAllMasked(NIB_MASK_MIXED|NIB_BASE_NAME, genoFile); else if (twoBitIsSpec(genoFile)) genos = twoBitLoadAll(genoFile); else genos = faReadDna(genoFile); while ((geno = slPopHead(&genos)) != NULL) { tolowers(geno->dna); hashAdd(genoHash, geno->name, geno); } return genoHash; }
void searchOneIndex(int fileCount, char *files[], struct genoFind *gf, char *outName, boolean isProt, struct hash *maskHash, FILE *outFile, boolean showStatus) /* Search all sequences in all files against single genoFind index. */ { int i; char *fileName; int count = 0; long long totalSize = 0; gfOutputHead(gvo, outFile); for (i=0; i<fileCount; ++i) { fileName = files[i]; if (nibIsFile(fileName)) { struct dnaSeq *seq; if (isProt) errAbort("%s: Can't use .nib files with -prot or d=prot option\n", fileName); seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName); freez(&seq->name); seq->name = cloneString(fileName); searchOneMaskTrim(seq, isProt, gf, outFile, maskHash, &totalSize, &count); freeDnaSeq(&seq); } else if (twoBitIsSpec(fileName)) { struct twoBitSpec *tbs = twoBitSpecNew(fileName); struct twoBitFile *tbf = twoBitOpen(tbs->fileName); if (isProt) errAbort("%s is a two bit file, which doesn't work for proteins.", fileName); if (tbs->seqs != NULL) { struct twoBitSeqSpec *ss = NULL; for (ss = tbs->seqs; ss != NULL; ss = ss->next) { struct dnaSeq *seq = twoBitReadSeqFrag(tbf, ss->name, ss->start, ss->end); searchOneMaskTrim(seq, isProt, gf, outFile, maskHash, &totalSize, &count); dnaSeqFree(&seq); } } else { struct twoBitIndex *index = NULL; for (index = tbf->indexList; index != NULL; index = index->next) { struct dnaSeq *seq = twoBitReadSeqFrag(tbf, index->name, 0, 0); searchOneMaskTrim(seq, isProt, gf, outFile, maskHash, &totalSize, &count); dnaSeqFree(&seq); } } twoBitClose(&tbf); } else { static struct dnaSeq seq; struct lineFile *lf = lineFileOpen(fileName, TRUE); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { searchOneMaskTrim(&seq, isProt, gf, outFile, maskHash, &totalSize, &count); } lineFileClose(&lf); } } carefulClose(&outFile); if (showStatus) printf("Searched %lld bases in %d sequences\n", totalSize, count); }