示例#1
0
void segRewind(struct segFile *sf)
/* Seek to beginning of open segment file */
{
if (sf == NULL)
	errAbort("segment file rewind failed -- file not open");
lineFileSeek(sf->lf, 0, SEEK_SET);
}
示例#2
0
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end)
/* rt1dCreate - find items in 1-D range tree. */
{
struct lineFile *lf = lineFileOpen(tabFile, TRUE);
struct crTreeFile *crf = crTreeFileOpen(treeFile);
struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end);
verbose(2, "Got %d overlapping blocks\n", slCount(blockList));
for (block = blockList; block != NULL; block = block->next)
    {
    verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size);
    lineFileSeek(lf, block->offset, SEEK_SET);
    bits64 sizeUsed = 0;
    while (sizeUsed < block->size)
        {
	char *line;
	int size;
	if (!lineFileNext(lf, &line, &size))
	    errAbort("Couldn't read %s\n", lf->fileName);
	char *parsedLine = cloneString(line);
	char *row[3];
	if (chopLine(parsedLine, row) != ArraySize(row))
	    errAbort("Badly formatted line of %s\n%s", lf->fileName, line);
	char *bedChrom = row[0];
	bits32 bedStart = sqlUnsigned(row[1]);
	bits32 bedEnd = sqlUnsigned(row[2]);
	if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0)
	    fprintf(stdout, "%s\n", line);
	freeMem(parsedLine);
	sizeUsed += size;
	}
    }
crTreeFileClose(&crf);
}
示例#3
0
文件: maf.c 项目: kenongit/sequencing
void mafRewind(struct mafFile *mf)
/* Seek to beginning of open maf file */
{
if (mf == NULL)
    errAbort("maf file rewind failed -- file not open");
lineFileSeek(mf->lf, 0, SEEK_SET);
}
示例#4
0
void writeMousePartsAsMaf(FILE *f, struct hash *mouseHash, 
	char *ratMouseDir, char *mouseChrom,
	int mouseStart, int mouseEnd, int mouseChromSize, 
	struct hash *rSizeHash, struct hash *dupeHash)
/* Write out mouse/rat alignments that intersect given region of mouse.
 * This gets a little involved because we need to do random access on
 * the mouse/rat alignment files, which are too big to fit into memory.
 * On disk we have a mouse/rat alignment file for each mouse chromosome,
 * and an index of it.  When we first access a mouse chromosome we load
 * the index for that chromosome into memory, and open the alignment file.
 * We then do a seek and read to load a particular alignment. */
{
struct mouseChromCache *mcc = NULL;
struct binElement *list = NULL, *el;
char aliName[512];

/* Get cache for this mouse chromosome */
mcc = hashFindVal(mouseHash, mouseChrom);
if (mcc == NULL)
    {
    mcc = newMouseChromCache(mouseChrom, mouseChromSize, ratMouseDir);
    hashAdd(mouseHash, mouseChrom, mcc);
    }
if (mcc->lf == NULL)
    return;

/* Get list of positions and process one axt into a maf for each */
list = binKeeperFindSorted(mcc->bk, mouseStart, mouseEnd);
for (el = list; el != NULL; el = el->next)
    {
    struct axt *axt;
    struct mafAli temp;
    long long *pPos, pos;
    pPos = el->val;
    pos = *pPos;
    sprintf(aliName, "%s.%lld", mouseChrom, pos);
    if (!hashLookup(dupeHash, aliName))
	{
	int rChromSize;
	hashAdd(dupeHash, aliName, NULL);
	lineFileSeek(mcc->lf, pos, SEEK_SET);
	axt = axtRead(mcc->lf);
	rChromSize = hashIntVal(rSizeHash, axt->qName);
	prefixAxt(axt, rPrefix, mPrefix);
	mafFromAxtTemp(axt, mouseChromSize, rChromSize, &temp);
	mafWriteGood(f, &temp);
	axtFree(&axt);
	}
    }
slFreeList(&list);
}
struct mafAli *mafLoadInRegion2(struct sqlConnection *conn,
    struct sqlConnection *conn2, char *table, char *chrom,
    int start, int end, char *file)
/* Return list of alignments in region. */
{
char **row;
unsigned int extFileId = 0;
struct mafAli *maf, *mafList = NULL;
struct mafFile *mf = NULL;
int rowOffset;

if (file != NULL)
    mf = mafOpen(file);

struct sqlResult *sr = hRangeQuery(conn, table, chrom,
    start, end, NULL, &rowOffset);

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct scoredRef ref;
    scoredRefStaticLoad(row + rowOffset, &ref);
    if ((file != NULL) && (ref.extFile != 0))
	errAbort("expect extFile to be zero if file specified\n");
    if ((file == NULL) && (ref.extFile == 0))
	errAbort("expect extFile to be not zero or file specified\n");

    if (ref.extFile != extFileId)
	{
	char *path = hExtFileNameC(conn2, "extFile", ref.extFile);
	mafFileFree(&mf);
	mf = mafOpen(path);
	extFileId = ref.extFile;
	}
    lineFileSeek(mf->lf, ref.offset, SEEK_SET);
    maf = mafNext(mf);
    if (maf == NULL)
        internalErr();
    slAddHead(&mafList, maf);
    }
sqlFreeResult(&sr);
mafFileFree(&mf);
slReverse(&mafList);
/* hRangeQuery may return items out-of-order when bin is used in the query,
 * so sort here in order to avoid trouble at base-level view: */
slSort(&mafList, mafCmp);
return mafList;
}
struct mafAli *axtLoadAsMafInRegion(struct sqlConnection *conn, char *table,
	char *chrom, int start, int end,
	char *tPrefix, char *qPrefix, int tSize,  struct hash *qSizeHash)
/* Return list of alignments in region from axt external file as a maf. */
{
char **row;
unsigned int extFileId = 0;
struct lineFile *lf = NULL;
struct mafAli *maf, *mafList = NULL;
struct axt *axt;
int rowOffset;
struct sqlResult *sr = hRangeQuery(conn, table, chrom,
    start, end, NULL, &rowOffset);

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct scoredRef ref;
    scoredRefStaticLoad(row + rowOffset, &ref);
    if (ref.extFile != extFileId)
	{
	char *path = hExtFileName(sqlGetDatabase(conn),"extFile", ref.extFile);
	lf = lineFileOpen(path, TRUE);
	extFileId = ref.extFile;
	}
    lineFileSeek(lf, ref.offset, SEEK_SET);
    axt = axtRead(lf);
    if (axt == NULL)
        internalErr();
    maf = mafFromAxt(axt, tSize, tPrefix, hashIntVal(qSizeHash, axt->qName), qPrefix);
    axtFree(&axt);
    slAddHead(&mafList, maf);
    }
sqlFreeResult(&sr);
lineFileClose(&lf);
slReverse(&mafList);
return mafList;
}
void scaffoldFaToAgp(char *scaffoldFile)
/* scaffoldFaToAgp - create AGP file, gap file and lift file 
* from scaffold FA file */
{
DNA *scaffoldSeq;
char *name;
int size;
struct agpFrag frag;
struct agpGap scaffoldGap, fragGap;

struct lineFile *lf = lineFileOpen(scaffoldFile, TRUE);
char outDir[256], outFile[128], ext[64], outPath[512];
FILE *agpFile = NULL, *gapFile = NULL, *liftFile = NULL;

int fileNumber = 1;      /* sequence number in AGP file */
int start = 0, end = 0;
int chromSize = 0;
int scaffoldCount = 0;

int fragSize = 0, gapSize = 0;
char *seq;
int seqStart = 0;

/* determine size of "unordered chromosome" that will be constructed.
 * This is needed for the lift file. */
while (faMixedSpeedReadNext(lf, &scaffoldSeq, &size, &name))
    {
    chromSize += size;
    chromSize += scaffoldGapSize;
    scaffoldCount++;
    }
/* do not need the final useless gap */
chromSize -= scaffoldGapSize;
printf("scaffold gap size is %d, total scaffolds: %d\n",
         scaffoldGapSize, scaffoldCount);
printf("chrom size is %d\n", chromSize);

/* initialize fixed fields in AGP frag */
ZeroVar(&frag);
frag.chrom = CHROM_NAME;
frag.type[0] = 'D';   /* draft */
frag.fragStart = 0;   /* always start at beginning of scaffold */
frag.strand[0] = '+';

/* initialize fixed fields in scaffold gap */
ZeroVar(&scaffoldGap);
scaffoldGap.chrom = CHROM_NAME;
scaffoldGap.n[0] = 'N';
scaffoldGap.size = scaffoldGapSize;
scaffoldGap.type = SCAFFOLD_GAP_TYPE;
scaffoldGap.bridge = "no";

/* initialize fixed fields in frag gap */
ZeroVar(&fragGap);
fragGap.chrom = CHROM_NAME;
fragGap.n[0] = 'N';
fragGap.type = FRAGMENT_GAP_TYPE;
fragGap.bridge = "yes";

/* munge file paths */
splitPath(scaffoldFile, outDir, outFile, ext);

sprintf(outPath, "%s%s.agp", outDir, outFile);
agpFile = mustOpen(outPath, "w");
printf("writing %s\n", outPath);

sprintf(outPath, "%s%s.gap", outDir, outFile);
gapFile = mustOpen(outPath, "w");
printf("writing %s\n", outPath);

sprintf(outPath, "%s%s.lft", outDir, outFile);
liftFile = mustOpen(outPath, "w");
printf("writing %s\n", outPath);

/* read in scaffolds from fasta file, and generate
 * the three files */
lineFileSeek(lf, 0, SEEK_SET);
boolean allDone = FALSE;
allDone = ! faMixedSpeedReadNext(lf, &scaffoldSeq, &size, &name);
while (! allDone)
    {
    end = start + size;

    /* setup AGP frag for the scaffold and write to AGP file */
    frag.frag = name;
    frag.ix = fileNumber++;
    frag.chromStart = start;
    frag.chromEnd = end;
    frag.fragEnd = size;
    agpFragOutput(&frag, agpFile, '\t', '\n');

    /* write lift file entry for this scaffold */
    fprintf(liftFile, "%d\t%s\t%d\t%s\t%d\n",
            start, name, size, CHROM_NAME, chromSize);

    /* write gap file entries for this scaffold */
    seq = scaffoldSeq;
    seqStart = start;
    while (seqGetGap(seq, &fragSize, &gapSize))
        {
        if (gapSize > minGapSize)
            {
            fragGap.size = gapSize;
            fragGap.chromStart = seqStart + fragSize + 1;
            fragGap.chromEnd = fragGap.chromStart + gapSize - 1;
            agpGapOutput(&fragGap, gapFile, '\t', '\n');
            }
        seqStart = seqStart + fragSize + gapSize;
        seq = seq + fragSize + gapSize;
        }

    /* setup AGP gap to separate scaffolds and write to AGP and gap files */
    /* Note: may want to suppress final gap -- not needed as separator */
    start = end + 1;
    end = start + scaffoldGapSize - 1;

    /*	Avoid an extra gap on the end - not needed */
    allDone = ! faMixedSpeedReadNext(lf, &scaffoldSeq, &size, &name);
    if (allDone)
	break;

    scaffoldGap.ix = fileNumber++;
    scaffoldGap.chromStart = start;
    scaffoldGap.chromEnd = end;
    agpGapOutput(&scaffoldGap, agpFile, '\t', '\n');
    agpGapOutput(&scaffoldGap, gapFile, '\t', '\n');

    /* write lift file entry for this gap */
    fprintf(liftFile, "%d\t%s\t%d\t%s\t%d\n",
            start-1, "gap", scaffoldGapSize, CHROM_NAME, chromSize);

    start = end;

    //freeMem(seq);
    }
carefulClose(&agpFile);
carefulClose(&liftFile);
carefulClose(&gapFile);
lineFileClose(&lf);
}
示例#8
0
void lineFileRewind(struct lineFile *lf)
/* Return lineFile to start. */
{
lineFileSeek(lf, 0, SEEK_SET);
lf->lineIx = 0;
}