void segRewind(struct segFile *sf) /* Seek to beginning of open segment file */ { if (sf == NULL) errAbort("segment file rewind failed -- file not open"); lineFileSeek(sf->lf, 0, SEEK_SET); }
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end) /* rt1dCreate - find items in 1-D range tree. */ { struct lineFile *lf = lineFileOpen(tabFile, TRUE); struct crTreeFile *crf = crTreeFileOpen(treeFile); struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end); verbose(2, "Got %d overlapping blocks\n", slCount(blockList)); for (block = blockList; block != NULL; block = block->next) { verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size); lineFileSeek(lf, block->offset, SEEK_SET); bits64 sizeUsed = 0; while (sizeUsed < block->size) { char *line; int size; if (!lineFileNext(lf, &line, &size)) errAbort("Couldn't read %s\n", lf->fileName); char *parsedLine = cloneString(line); char *row[3]; if (chopLine(parsedLine, row) != ArraySize(row)) errAbort("Badly formatted line of %s\n%s", lf->fileName, line); char *bedChrom = row[0]; bits32 bedStart = sqlUnsigned(row[1]); bits32 bedEnd = sqlUnsigned(row[2]); if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0) fprintf(stdout, "%s\n", line); freeMem(parsedLine); sizeUsed += size; } } crTreeFileClose(&crf); }
void mafRewind(struct mafFile *mf) /* Seek to beginning of open maf file */ { if (mf == NULL) errAbort("maf file rewind failed -- file not open"); lineFileSeek(mf->lf, 0, SEEK_SET); }
void writeMousePartsAsMaf(FILE *f, struct hash *mouseHash, char *ratMouseDir, char *mouseChrom, int mouseStart, int mouseEnd, int mouseChromSize, struct hash *rSizeHash, struct hash *dupeHash) /* Write out mouse/rat alignments that intersect given region of mouse. * This gets a little involved because we need to do random access on * the mouse/rat alignment files, which are too big to fit into memory. * On disk we have a mouse/rat alignment file for each mouse chromosome, * and an index of it. When we first access a mouse chromosome we load * the index for that chromosome into memory, and open the alignment file. * We then do a seek and read to load a particular alignment. */ { struct mouseChromCache *mcc = NULL; struct binElement *list = NULL, *el; char aliName[512]; /* Get cache for this mouse chromosome */ mcc = hashFindVal(mouseHash, mouseChrom); if (mcc == NULL) { mcc = newMouseChromCache(mouseChrom, mouseChromSize, ratMouseDir); hashAdd(mouseHash, mouseChrom, mcc); } if (mcc->lf == NULL) return; /* Get list of positions and process one axt into a maf for each */ list = binKeeperFindSorted(mcc->bk, mouseStart, mouseEnd); for (el = list; el != NULL; el = el->next) { struct axt *axt; struct mafAli temp; long long *pPos, pos; pPos = el->val; pos = *pPos; sprintf(aliName, "%s.%lld", mouseChrom, pos); if (!hashLookup(dupeHash, aliName)) { int rChromSize; hashAdd(dupeHash, aliName, NULL); lineFileSeek(mcc->lf, pos, SEEK_SET); axt = axtRead(mcc->lf); rChromSize = hashIntVal(rSizeHash, axt->qName); prefixAxt(axt, rPrefix, mPrefix); mafFromAxtTemp(axt, mouseChromSize, rChromSize, &temp); mafWriteGood(f, &temp); axtFree(&axt); } } slFreeList(&list); }
struct mafAli *mafLoadInRegion2(struct sqlConnection *conn, struct sqlConnection *conn2, char *table, char *chrom, int start, int end, char *file) /* Return list of alignments in region. */ { char **row; unsigned int extFileId = 0; struct mafAli *maf, *mafList = NULL; struct mafFile *mf = NULL; int rowOffset; if (file != NULL) mf = mafOpen(file); struct sqlResult *sr = hRangeQuery(conn, table, chrom, start, end, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct scoredRef ref; scoredRefStaticLoad(row + rowOffset, &ref); if ((file != NULL) && (ref.extFile != 0)) errAbort("expect extFile to be zero if file specified\n"); if ((file == NULL) && (ref.extFile == 0)) errAbort("expect extFile to be not zero or file specified\n"); if (ref.extFile != extFileId) { char *path = hExtFileNameC(conn2, "extFile", ref.extFile); mafFileFree(&mf); mf = mafOpen(path); extFileId = ref.extFile; } lineFileSeek(mf->lf, ref.offset, SEEK_SET); maf = mafNext(mf); if (maf == NULL) internalErr(); slAddHead(&mafList, maf); } sqlFreeResult(&sr); mafFileFree(&mf); slReverse(&mafList); /* hRangeQuery may return items out-of-order when bin is used in the query, * so sort here in order to avoid trouble at base-level view: */ slSort(&mafList, mafCmp); return mafList; }
struct mafAli *axtLoadAsMafInRegion(struct sqlConnection *conn, char *table, char *chrom, int start, int end, char *tPrefix, char *qPrefix, int tSize, struct hash *qSizeHash) /* Return list of alignments in region from axt external file as a maf. */ { char **row; unsigned int extFileId = 0; struct lineFile *lf = NULL; struct mafAli *maf, *mafList = NULL; struct axt *axt; int rowOffset; struct sqlResult *sr = hRangeQuery(conn, table, chrom, start, end, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct scoredRef ref; scoredRefStaticLoad(row + rowOffset, &ref); if (ref.extFile != extFileId) { char *path = hExtFileName(sqlGetDatabase(conn),"extFile", ref.extFile); lf = lineFileOpen(path, TRUE); extFileId = ref.extFile; } lineFileSeek(lf, ref.offset, SEEK_SET); axt = axtRead(lf); if (axt == NULL) internalErr(); maf = mafFromAxt(axt, tSize, tPrefix, hashIntVal(qSizeHash, axt->qName), qPrefix); axtFree(&axt); slAddHead(&mafList, maf); } sqlFreeResult(&sr); lineFileClose(&lf); slReverse(&mafList); return mafList; }
void scaffoldFaToAgp(char *scaffoldFile) /* scaffoldFaToAgp - create AGP file, gap file and lift file * from scaffold FA file */ { DNA *scaffoldSeq; char *name; int size; struct agpFrag frag; struct agpGap scaffoldGap, fragGap; struct lineFile *lf = lineFileOpen(scaffoldFile, TRUE); char outDir[256], outFile[128], ext[64], outPath[512]; FILE *agpFile = NULL, *gapFile = NULL, *liftFile = NULL; int fileNumber = 1; /* sequence number in AGP file */ int start = 0, end = 0; int chromSize = 0; int scaffoldCount = 0; int fragSize = 0, gapSize = 0; char *seq; int seqStart = 0; /* determine size of "unordered chromosome" that will be constructed. * This is needed for the lift file. */ while (faMixedSpeedReadNext(lf, &scaffoldSeq, &size, &name)) { chromSize += size; chromSize += scaffoldGapSize; scaffoldCount++; } /* do not need the final useless gap */ chromSize -= scaffoldGapSize; printf("scaffold gap size is %d, total scaffolds: %d\n", scaffoldGapSize, scaffoldCount); printf("chrom size is %d\n", chromSize); /* initialize fixed fields in AGP frag */ ZeroVar(&frag); frag.chrom = CHROM_NAME; frag.type[0] = 'D'; /* draft */ frag.fragStart = 0; /* always start at beginning of scaffold */ frag.strand[0] = '+'; /* initialize fixed fields in scaffold gap */ ZeroVar(&scaffoldGap); scaffoldGap.chrom = CHROM_NAME; scaffoldGap.n[0] = 'N'; scaffoldGap.size = scaffoldGapSize; scaffoldGap.type = SCAFFOLD_GAP_TYPE; scaffoldGap.bridge = "no"; /* initialize fixed fields in frag gap */ ZeroVar(&fragGap); fragGap.chrom = CHROM_NAME; fragGap.n[0] = 'N'; fragGap.type = FRAGMENT_GAP_TYPE; fragGap.bridge = "yes"; /* munge file paths */ splitPath(scaffoldFile, outDir, outFile, ext); sprintf(outPath, "%s%s.agp", outDir, outFile); agpFile = mustOpen(outPath, "w"); printf("writing %s\n", outPath); sprintf(outPath, "%s%s.gap", outDir, outFile); gapFile = mustOpen(outPath, "w"); printf("writing %s\n", outPath); sprintf(outPath, "%s%s.lft", outDir, outFile); liftFile = mustOpen(outPath, "w"); printf("writing %s\n", outPath); /* read in scaffolds from fasta file, and generate * the three files */ lineFileSeek(lf, 0, SEEK_SET); boolean allDone = FALSE; allDone = ! faMixedSpeedReadNext(lf, &scaffoldSeq, &size, &name); while (! allDone) { end = start + size; /* setup AGP frag for the scaffold and write to AGP file */ frag.frag = name; frag.ix = fileNumber++; frag.chromStart = start; frag.chromEnd = end; frag.fragEnd = size; agpFragOutput(&frag, agpFile, '\t', '\n'); /* write lift file entry for this scaffold */ fprintf(liftFile, "%d\t%s\t%d\t%s\t%d\n", start, name, size, CHROM_NAME, chromSize); /* write gap file entries for this scaffold */ seq = scaffoldSeq; seqStart = start; while (seqGetGap(seq, &fragSize, &gapSize)) { if (gapSize > minGapSize) { fragGap.size = gapSize; fragGap.chromStart = seqStart + fragSize + 1; fragGap.chromEnd = fragGap.chromStart + gapSize - 1; agpGapOutput(&fragGap, gapFile, '\t', '\n'); } seqStart = seqStart + fragSize + gapSize; seq = seq + fragSize + gapSize; } /* setup AGP gap to separate scaffolds and write to AGP and gap files */ /* Note: may want to suppress final gap -- not needed as separator */ start = end + 1; end = start + scaffoldGapSize - 1; /* Avoid an extra gap on the end - not needed */ allDone = ! faMixedSpeedReadNext(lf, &scaffoldSeq, &size, &name); if (allDone) break; scaffoldGap.ix = fileNumber++; scaffoldGap.chromStart = start; scaffoldGap.chromEnd = end; agpGapOutput(&scaffoldGap, agpFile, '\t', '\n'); agpGapOutput(&scaffoldGap, gapFile, '\t', '\n'); /* write lift file entry for this gap */ fprintf(liftFile, "%d\t%s\t%d\t%s\t%d\n", start-1, "gap", scaffoldGapSize, CHROM_NAME, chromSize); start = end; //freeMem(seq); } carefulClose(&agpFile); carefulClose(&liftFile); carefulClose(&gapFile); lineFileClose(&lf); }
void lineFileRewind(struct lineFile *lf) /* Return lineFile to start. */ { lineFileSeek(lf, 0, SEEK_SET); lf->lineIx = 0; }