struct hash *qacReadToHash(char *fileName) /* Read in a qac file into a hash of qacs keyed by name. */ { boolean isSwapped; FILE *f = qacOpenVerify(fileName, &isSwapped); bits32 compSize, uncSize; struct qac *qac; char *name; struct hash *hash = newHash(18); int count = 0; for (;;) { name = readString(f); if (name == NULL) break; mustReadOne(f, uncSize); if (isSwapped) uncSize = byteSwap32(uncSize); mustReadOne(f, compSize); if (isSwapped) compSize = byteSwap32(compSize); qac = needHugeMem(sizeof(*qac) + compSize - 1); qac->uncSize = uncSize; qac->compSize = compSize; mustRead(f, qac->data, compSize); hashAdd(hash, name, qac); ++count; } carefulClose(&f); printf("Read %d qacs from %s\n", count, fileName); return hash; }
struct qaSeq *qacReadNext(FILE *f, boolean isSwapped) /* Read in next record in .qac file. */ { bits32 cSize, origSize; struct qaSeq *qa; signed char *buf; char *s; s = readString(f); if (s == NULL) return NULL; AllocVar(qa); qa->name = s; mustReadOne(f, origSize); if (isSwapped) origSize = byteSwap32(origSize); mustReadOne(f, cSize); if (isSwapped) cSize = byteSwap32(cSize); qa->size = origSize; qa->qa = needLargeMem(origSize); buf = needLargeMem(cSize); mustRead(f, buf, cSize); rleUncompress(buf, cSize, qa->qa, origSize); freeMem(buf); return qa; }
void mapReadHead(FILE *f, bits32 *width, bits32 *height, char **bac, int *trim, char **repeatMask) /* Read start of map file and abort if it isn't right. */ { bits32 sig; mustReadOne(f, sig); if (sig != gluMapSig) errAbort("Bad map file"); mustReadOne(f,*width); mustReadOne(f,*height); mapReadString(f, bac); mustReadOne(f, *trim); mapReadString(f, repeatMask); }
void mapReadBox(FILE *f, bits16 *mt, int *x, int *y, int *width, int *height, char **bac, bits16 *contig, int *qStart, int *qSize, int *tStart, int *tSize) /* Read in one hit box. */ { mustReadOne(f, *mt); mustReadOne(f, *x); mustReadOne(f, *y); mustReadOne(f, *width); mustReadOne(f, *height); mapReadString(f, bac); mustReadOne(f, *contig); mustReadOne(f, *qStart); mustReadOne(f, *qSize); mustReadOne(f, *tStart); mustReadOne(f, *tSize); }
void oocMaskCounts(char *oocFile, bits32 *tileCounts, int tileSize, bits32 maxPat) /* Set items of tileCounts to maxPat if they are in oocFile. * Effectively masks this out of index.*/ { if (oocFile != NULL) { bits32 sig, psz; FILE *f = mustOpen(oocFile, "rb"); boolean mustSwap = FALSE; mustReadOne(f, sig); mustReadOne(f, psz); if (sig == oocSig) mustSwap = FALSE; else if (sig == oocSigSwapped) { mustSwap = TRUE; psz = byteSwap32(psz); } else errAbort("Bad signature on %s\n", oocFile); if (psz != tileSize) errAbort("Oligo size mismatch in %s. Expecting %d got %d\n", oocFile, tileSize, psz); if (mustSwap) { union {bits32 whole; UBYTE bytes[4];} u,v; while (readOne(f, u)) { v.bytes[0] = u.bytes[3]; v.bytes[1] = u.bytes[2]; v.bytes[2] = u.bytes[1]; v.bytes[3] = u.bytes[0]; tileCounts[v.whole] = maxPat; } } else { bits32 oli; while (readOne(f, oli)) tileCounts[oli] = maxPat; } fclose(f); } }
static struct wormFeature *scanChromOffsetFile(char *dir, char *suffix, bits32 signature, int nameOffset, char *chromId, int start, int end, int addEnd) /* Scan a chrom.pgo or chrom.cdo file for names of things that are within * range. */ { FILE *f; char fileName[512]; bits32 sig, nameSize, entryCount; int entrySize; int *entry; char *name; bits32 i; struct wormFeature *list = NULL, *el; char *typePt; char typeByte; sprintf(fileName, "%s%s%s", dir, chromId, suffix); f = mustOpen(fileName, "rb"); mustReadOne(f, sig); if (sig != signature) errAbort("Bad signature on %s", fileName); mustReadOne(f, entryCount); mustReadOne(f, nameSize); entrySize = nameSize + nameOffset; entry = needMem(entrySize + 1); name = (char *)entry; name += nameOffset; typePt = name-1; for (i=0; i<entryCount; ++i) { mustRead(f, entry, entrySize); if (entry[0] > end) break; if (entry[1] < start) continue; typeByte = *typePt; el = newWormFeature(name, chromId, entry[0], entry[1]+addEnd, typeByte); slAddHead(&list, el); } slReverse(&list); fclose(f); freeMem(entry); return list; }
int nt4BaseCount(char *fileName) /* Return number of bases in NT4 file. */ { bits32 size; FILE *f = nt4OpenVerify(fileName); mustReadOne(f, size); fclose(f); return (int)size; }
FILE *xaIxOpenVerify(char *fileName) /* Open file, verify that it's a good xa index. */ { FILE *f; bits32 sig; f = mustOpen(fileName, "rb"); mustReadOne(f, sig); if (sig != xaoSig) errAbort("Bad signature on %s", fileName); return f; }
struct gdfGene *gdfReadOneGene(FILE *f) /* Read one entry from a Gdf file. Assumes that the file pointer * is in the right place. */ { short pointCount; char strand; UBYTE geneNameSize, chromIx; char geneNameBuf[128]; struct gdfGene *gene; mustReadOne(f, geneNameSize); mustRead(f, geneNameBuf, geneNameSize); geneNameBuf[geneNameSize] = 0; mustReadOne(f, chromIx); mustReadOne(f, strand); mustReadOne(f, pointCount); gene = newGdfGene(geneNameBuf, geneNameSize, pointCount>>1, strand, chromIx); mustRead(f, gene->dataPoints, sizeof(gene->dataPoints[0]) * pointCount); return gene; }
static FILE *nt4OpenVerify(char *fileName) /* Open up an nt4 file and verify signature. * Abort if any problem. */ { FILE *f = mustOpen(fileName, "rb"); bits32 signature; mustReadOne(f, signature); if (signature != nt4Signature) errAbort("%s is not a good Nt4 file", fileName); return f; }
boolean isBigWig(char *fileName) /* Peak at a file to see if it's bigWig */ { FILE *f = mustOpen(fileName, "rb"); bits32 sig; mustReadOne(f, sig); fclose(f); if (sig == bigWigSig) return TRUE; sig = byteSwap32(sig); return sig == bigWigSig; }
struct cdaAli *wormCdaAlisInRange(char *chromId, int start, int end) /* Return list of cdna alignments that overlap range. */ { struct cdaAli *list = NULL, *el; char fileName[512]; FILE *ixFile, *aliFile; bits32 sig; int s, e; long fpos; aliFile = wormOpenGoodAli(); sprintf(fileName, "%s%s.alx", cdnaDir, chromId); ixFile = mustOpen(fileName, "rb"); mustReadOne(ixFile, sig); if (sig != alxSig) errAbort("Bad signature on %s", fileName); for (;;) { if (!readOne(ixFile, s)) break; mustReadOne(ixFile, e); mustReadOne(ixFile, fpos); if (e <= start) continue; if (s >= end) break; AllocVar(el); fseek(aliFile, fpos, SEEK_SET); el = cdaLoadOne(aliFile); if (el == NULL) errAbort("Truncated cdnaAli file"); slAddHead(&list, el); } slReverse(&list); fclose(aliFile); fclose(ixFile); return list; }
FILE *qacOpenVerify(char *fileName, boolean *retIsSwapped) /* Open qac file, and verify that it is indeed a qac file. */ { FILE *f = mustOpen(fileName, "rb"); bits32 sig; mustReadOne(f, sig); if (sig == qacSig) *retIsSwapped = FALSE; else if (sig == caqSig) *retIsSwapped = TRUE; else errAbort("%s is not a good .qac file", fileName); return f; }
void mapReadString(FILE *f, char **ps) /* Read string from map file. */ { short len; char *s; mustReadOne(f, len); if (len == 0) s = NULL; else { s = needMem(len+1); mustRead(f, s, len); } *ps = s; }
struct nt4Seq *loadNt4(char *fileName, char *seqName) /* Load up an nt4 sequence from a file. */ { bits32 size; struct nt4Seq *seq; FILE *f = nt4OpenVerify(fileName); mustReadOne(f, size); if (seqName == NULL) seqName = fileName; seq = allocNt4(size, seqName); mustRead(f, seq->bases, bits32PaddedSize(size)); carefulClose(&f); return seq; }
struct xaAli *xaRdRange(FILE *ix, FILE *data, int start, int end, boolean condensed) /* Return list of all xaAlis that range from start to end. * Assumes that ix and data files are open. If condensed * don't fill int query, target, qSym, tSym, or hSym. */ { int s, e; int maxS, minE; long offset; struct xaAli *list = NULL, *xa; /* Scan through index file looking for things in range. * When find one read it from data file and add it to list. */ fseek(ix, sizeof(bits32), SEEK_SET); for (;;) { if (!readOne(ix, s)) break; mustReadOne(ix, e); mustReadOne(ix, offset); if (s >= end) break; maxS = max(s, start); minE = min(e, end); if (minE - maxS > 0) { fseek(data, offset, SEEK_SET); xa = xaReadNext(data, condensed); slAddHead(&list, xa); } } slReverse(&list); return list; }
static void wormCacheSomeGdf(struct wormGdfCache *cache) /* Cache one gene prediction set. */ { if (cache->snof == NULL) { char fileName[512]; char *dir; bits32 sig; getDirs(); dir = *(cache->pDir); sprintf(fileName, "%sgenes", dir); cache->snof = snofMustOpen(fileName); sprintf(fileName, "%sgenes.gdf", dir); cache->file = mustOpen(fileName, "rb"); mustReadOne(cache->file, sig); if (sig != glSig) errAbort("%s is not a good file", fileName); } }
long incCounterFile(char *fileName) /* Increment a 32 bit value on disk. */ { long val = 0; FILE *f = fopen(fileName, "r+b"); if (f != NULL) { mustReadOne(f, val); rewind(f); } else { f = fopen(fileName, "wb"); } ++val; if (f != NULL) { fwrite(&val, sizeof(val), 1, f); if (fclose(f) != 0) errnoAbort("fclose failed"); } return val; }
struct patSpace *makePatSpace(struct dnaSeq **seqArray, int arraySize, char *oocFileName) /* Allocate a pattern space and fill from sequences. (Each element of seqArray is a list of sequences. */ { struct patSpace *ps = newPatSpace(); int i; int startIx = 0; int total = 0; long startTime, endTime; struct dnaSeq *seq; int globalOver = 0, localOver = 0; bits16 maxPat; bits16 *listSizes; startTime = clock1000(); maxPat = ps->maxPat = maxPatCount; for (i=0; i<arraySize; ++i) { for (seq = seqArray[i]; seq != NULL; seq = seq->next) { total += seq->size; countPatSpace(seq, ps); } } endTime = clock1000(); printf("%4.2f seconds to countPatSpace %d bases\n", 0.001*(endTime-startTime), total ); listSizes = ps->listSizes; /* Scan through over-popular patterns and set their count to value * where they won't be added to pat space. */ { bits32 sig, psz; FILE *f = mustOpen(oocFileName, "rb"); bits32 oli; mustReadOne(f, sig); mustReadOne(f, psz); if (sig != oocSig) errAbort("Bad signature on %s\n", oocFileName); if (psz != patSize) errAbort("Oligo size mismatch in %s. Expecting %d got %d\n", oocFileName, patSize, psz); while (readOne(f, oli)) listSizes[oli] = maxPat; fclose(f); } startTime = clock1000(); allocPatSpaceLists(ps); endTime = clock1000(); printf("%4.2f seconds to allocPatSpaceLists\n", 0.001*(endTime-startTime) ); startTime = clock1000(); /* Zero out pattern counts that aren't oversubscribed. */ for (i=0; i<patSpaceSize; ++i) { if (listSizes[i] < maxPat) listSizes[i] = 0; } if (dumpMe) fprintf(dumpOut, "BlockIx vs. Seq position table:\n"); for (i=0; i<arraySize; ++i) { int j; for (seq = seqArray[i], j=0; seq != NULL; seq = seq->next, ++j) { startIx = addToPatSpace(i, j, seq, startIx, ps); if (startIx >= maxBlockCount) errAbort("Too many blocks, can only handle %d\n", maxBlockCount); } } ps->blocksUsed = startIx; if (dumpMe) fprintf(dumpOut, "\n"); printf("%d blocks of %d used\n", ps->blocksUsed, maxBlockCount); /* Zero local over-popular patterns. */ for (i=0; i<patSpaceSize; ++i) { if (listSizes[i] >= maxPat) listSizes[i] = 0; } endTime = clock1000(); printf("%4.2f seconds to addToPatSpace\n", 0.001*(endTime-startTime) ); return ps; }