static void goldLoad(struct track *tg)
/* Load up golden path from database table to track items. */
{
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr = NULL;
char **row;
struct agpFrag *fragList = NULL, *frag;
struct agpGap *gapList = NULL, *gap;
int rowOffset;

/* Get the frags and load into tg->items. */
sr = hRangeQuery(conn, "gold", chromName, winStart, winEnd, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    frag = agpFragLoad(row+rowOffset);
    slAddHead(&fragList, frag);
    }
slSort(&fragList, cmpAgpFrag);
sqlFreeResult(&sr);
tg->items = fragList;

/* Get the gaps into tg->customPt. */
sr = hRangeQuery(conn, "gap", chromName, winStart, winEnd, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    gap = agpGapLoad(row+rowOffset);
    slAddHead(&gapList, gap);
    }
slReverse(&gapList);
sqlFreeResult(&sr);
tg->customPt = gapList;
hFreeConn(&conn);
}
Пример #2
0
struct agp *agpLoad(char **row, int ct)
/* Load an AGP entry from array of strings.  Dispose with agpFree */
{
struct agp *agp;
struct agpFrag *agpFrag;
struct agpGap *agpGap;

if (ct < 8)
    errAbort("Expecting >= 8 words in AGP file, got %d\n", ct);

AllocVar(agp);
if (row[4][0] != 'N' && row[4][0] != 'U')
    {
    /* not a gap */
    if (ct != 9)
        errAbort("Expecting 9 words in AGP fragment line, got %d\n", ct);
    agpFrag = agpFragLoad(row);
    agp->entry = agpFrag;
    agp->isFrag = TRUE;
    }
else
    {
    /* gap */
    agpGap = agpGapLoad(row);
    agp->entry = agpGap;
    agp->isFrag = FALSE;
    }
return agp;
}
Пример #3
0
static struct agpGap *loadAllGaps(struct sqlConnection *conn,
	char *db, struct chromInfo *cInfoList)
/*	fetch all gaps, returns list of gaps */
{ 
struct agpGap *gapList = NULL;
struct chromInfo *cInfo;
int gapCount = 0;

for (cInfo = cInfoList; cInfo; cInfo = cInfo->next)
    {
    char **row;
    int rowOffset;
    struct sqlResult *sr = hRangeQuery(conn, "gap", cInfo->chrom, 0,
	cInfo->size, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
	{
	struct agpGap *gap = agpGapLoad(row+rowOffset);
	if (minGap)
	    {
	    if (gap->size >= minGap)
		{
		slAddHead(&gapList, gap);
		++gapCount;
		}
	    }
	else
	    {
	    slAddHead(&gapList, gap);
	    ++gapCount;
	    }
	}
    sqlFreeResult(&sr);
    }
slSort(&gapList, bedCmp);
if (! insane)
    gapSanityCheck(gapList);
verbose(2,"#\tfound %d gaps of size >= %d\n", gapCount, minGap);
return (gapList);
}
Пример #4
0
struct agpData* nextAgpEntryToSplitOn(struct lineFile *lfAgpFile, int dnaSize, struct agpData **retStartData)
/*
Finds the next agp entry in the agp file at which to split on.

param lfAgpFile - The .agp file we are examining.
param dnaSize - The total size of the chromsome's dna sequence 
 we are splitting up. Used to prevent overrun of the algorithm
 that looks at agp entries.
param retStartData - An out param returning the starting(inclusive) gap that we
 will start to split on.

return struct agpData* - The ending (inclusive) agp data we are to split on.
 */
{
int startIndex = 0;
int numBasesRead = 0;
char *line = NULL;
char *words[9];
int lineSize = 0;
struct agpGap *agpGap = NULL;
struct agpFrag *agpFrag = NULL;
struct agpData *curAgpData = NULL;
struct agpData *prevAgpData = NULL;
boolean splitPointFound = FALSE;
int splitSize = _nSize;

do 
{
lineFileNext(lfAgpFile, &line, &lineSize);

if (line[0] == '#' || line[0] == '\n')
    {
    continue;
    }

AllocVar(curAgpData);
curAgpData->endOfContig = FALSE;
curAgpData->isGap = FALSE;
curAgpData->prev = NULL;
curAgpData->next = NULL;

chopLine(line, words);
if (words[4][0] == 'N' || words[4][0] == 'U')
    {
    agpGap = agpGapLoad(words);
    /* 
      Decrement the chromStart index since that's how the agpFrags do it
       and we want to use 0-based addressing
    */
    --(agpGap->chromStart);

    if (0 == startIndex)
	{
        startIndex = agpGap->chromStart;
        }

    if (numBasesRead >= _bSize)
        {
        splitPointFound = TRUE;
        }
    else
        {
        /* Split points are made after non-bridged contigs only here */
        splitPointFound = (0 == strcasecmp(agpGap->bridge, NO));
        }
     
    curAgpData->isGap = TRUE;
    curAgpData->data.pGap = agpGap;
    }
else
    {
    agpFrag = agpFragLoad(words);
    // file is 1-based but agpFragLoad() now assumes 0-based:
    agpFrag->chromStart -= 1;
    agpFrag->fragStart  -= 1;
    /* If we find a fragment and not a gap */
    if (0 == startIndex)
        {
        startIndex = agpFrag->chromStart;
        }

    if (numBasesRead >= _aSize) 
        {
        splitPointFound = TRUE;
        }
    else
        {
        splitPointFound = FALSE;
        }

    curAgpData->isGap = FALSE;
    curAgpData->data.pFrag = agpFrag;
    }

/* Since this our first loop iteration,
 save the start gap as the beginning of the section to write out */
if (NULL == prevAgpData) 
    {    
    *retStartData = curAgpData; /* Save the pointer to the head of the list */
    }
else
    {
    /* Build a doubly linked list for use elsewhere */
    prevAgpData->next = curAgpData;
    curAgpData->prev = prevAgpData;
    }

prevAgpData = curAgpData;
numBasesRead = curAgpData->data.pGap->chromEnd - startIndex;

} while ((numBasesRead < splitSize || !splitPointFound)
       && curAgpData->data.pGap->chromEnd < dnaSize);

curAgpData->next = NULL; /* Terminate the linked list */
curAgpData->endOfContig = TRUE;
return curAgpData;
}
Пример #5
0
static void agpSangerUnfinished(char *agpFile, char *contigFasta, char *agpOut)
/* Fix agp to match unfinished contigs in fasta */
{
struct lineFile *lf = lineFileOpen(agpFile, TRUE);
char *line, *words[16];
int lineSize, wordCount;
unsigned lastPos = 0;
struct agpFrag *agp;
struct agpGap *gap;
FILE *f;
char *lastObj = NULL;
f = mustOpen(agpOut, "w");
char *newChrom = NULL;
struct hash *hash = hashFasta(contigFasta);

verbose(2,"#\tprocessing AGP file: %s\n", agpFile);
while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == 0 || line[0] == '#' || line[0] == '\n')
        continue;
    //verbose(2,"#\tline: %d\n", lf->lineIx);
    wordCount = chopLine(line, words);
    if (wordCount < 5)
        errAbort("Bad line %d of %s: need at least 5 words, got %d\n",
		 lf->lineIx, lf->fileName, wordCount);

    if (!lastObj || !sameString(words[0],lastObj))
	{
	freez(&newChrom);
	newChrom = cloneString(words[0]);
	lastPos = 0;
	}

    	
		 
    if (words[4][0] != 'N')
	{
	lineFileExpectAtLeast(lf, 9, wordCount);
	agp = agpFragLoad(words);
	/* agp is 1-based but agp loaders do not adjust for 0-based: */
    	agp->chromStart -= 1;
	agp->fragStart  -= 1;
	if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart)
	    errAbort("Sizes don't match in %s and %s line %d of %s\n",
		agp->chrom, agp->frag, lf->lineIx, lf->fileName);

	char *root = cloneString(agp->frag);
	chopSuffixAt(root, '.');

	struct hashEl *e, *elist = hashLookup(hash, root);
	for (e = elist; e; e = hashLookupNext(e))
	    {
	    struct unfinishedContig *u = e->val;
            if ((u->fragStart <= agp->fragStart) && (u->fragEnd >= agp->fragEnd))
		{
		agp->frag = cloneString(u->frag);
		agp->fragEnd -= u->fragStart;
		agp->fragStart -= u->fragStart;
		}
	    }
	freeMem(root);
	}
    else
        {
	lineFileExpectAtLeast(lf, 8, wordCount);
	gap = agpGapLoad(words);
	/* to be consistent with agpFrag */
	gap->chromStart -= 1;
	agp = (struct agpFrag*)gap;
	}

    if (agp->chromStart != lastPos)
	errAbort("Start doesn't match previous end line %d of %s\n"
	    "agp->chromStart: %u\n" 
	    "agp->chromEnd: %u\n" 
	    "lastPos: %u\n" 
	    ,lf->lineIx, lf->fileName
	    ,agp->chromStart
	    ,agp->chromEnd
	    ,lastPos
	    );

    lastPos = agp->chromEnd;
    freez(&lastObj);
    lastObj = cloneString(words[0]); /* not agp->chrom which may be modified already */
	
    if (words[4][0] != 'N')
	{
	/* agpFragOutput assumes 0-based-half-open, but writes 1-based for agp */
	agpFragOutput(agp, f, '\t', '\n');
	agpFragFree(&agp);
	}
    else
        {
	/* restore back to 1-based for agp 
	 * because agpGapOutput doesn't compensate */
	gap->chromStart += 1;
	agpGapOutput(gap, f, '\t', '\n');
	agpGapFree(&gap);
	}
	
    }

carefulClose(&f);
}
static void agpMergeChromScaf(char *agpFile, char *agpOut, boolean filtering)
/* Create a combined agp file from the chrom.agp and scaffold.agp, 
 *  merging in only scaffolds from scaffold.agp
 *  that are not already in chroms. */
{
struct lineFile *lf = lineFileOpen(agpFile, TRUE);
char *line, *words[16];
int lineSize, wordCount;
unsigned lastPos = 0;
struct agpFrag *agp;
struct agpGap *gap;
FILE *f;
char *lastObj = NULL;
f = mustOpen(agpOut, filtering ? "a" : "w");
char *newChrom = NULL;
static struct hash *hash = NULL;
boolean skipping = FALSE;

if (!hash)
    hash = hashNew(0);

verbose(2,"#\tprocessing AGP file: %s\n", agpFile);
while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == 0 || line[0] == '#' || line[0] == '\n')
        continue;
    //verbose(2,"#\tline: %d\n", lf->lineIx);
    wordCount = chopLine(line, words);
    if (wordCount < 5)
        errAbort("Bad line %d of %s: need at least 5 words, got %d\n",
		 lf->lineIx, lf->fileName, wordCount);

    if (!lastObj || !sameString(words[0],lastObj))
	{
	freez(&newChrom);
	newChrom = cloneString(words[0]);
	lastPos = 0;
	}

    	
    skipping = FALSE;
    if (filtering)
	{
	if (hashLookup(hash, words[0]))
	    skipping = TRUE;
	}
		 
    if (words[4][0] != 'N')
	{
	lineFileExpectAtLeast(lf, 9, wordCount);
	agp = agpFragLoad(words);
	/* agp is 1-based but agp loaders do not adjust for 0-based: */
    	agp->chromStart -= 1;
	agp->fragStart  -= 1;
	if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart)
	    errAbort("Sizes don't match in %s and %s line %d of %s\n",
		agp->chrom, agp->frag, lf->lineIx, lf->fileName);
        if (!filtering)
	    {
	    char *root = cloneString(agp->frag);
	    chopSuffixAt(root, '.');
	    hashStore(hash, root);
	    freeMem(root);
	    }
	}
    else
        {
	lineFileExpectAtLeast(lf, 8, wordCount);
	gap = agpGapLoad(words);
	/* to be consistent with agpFrag */
	gap->chromStart -= 1;
	agp = (struct agpFrag*)gap;
	}

    if (agp->chromStart != lastPos)
	errAbort("Start doesn't match previous end line %d of %s\n"
	    "agp->chromStart: %u\n" 
	    "agp->chromEnd: %u\n" 
	    "lastPos: %u\n" 
	    ,lf->lineIx, lf->fileName
	    ,agp->chromStart
	    ,agp->chromEnd
	    ,lastPos
	    );

    lastPos = agp->chromEnd;
    freez(&lastObj);
    lastObj = cloneString(words[0]); /* not agp->chrom which may be modified already */
	
    if (words[4][0] != 'N')
	{
	/* agpFragOutput assumes 0-based-half-open, but writes 1-based for agp */
	if (!skipping)
    	    agpFragOutput(agp, f, '\t', '\n');
	agpFragFree(&agp);
	}
    else
        {
	/* restore back to 1-based for agp 
	 * because agpGapOutput doesn't compensate */
	gap->chromStart += 1;
	if (!skipping)
	    agpGapOutput(gap, f, '\t', '\n');
	agpGapFree(&gap);
	}
	
    }

carefulClose(&f);
}
Пример #7
0
struct hash *agpLoadAll(char *agpFile)
/* load AGP entries into a hash of AGP lists, one per chromosome */
{
struct hash *agpHash = newHash(0);
struct lineFile *lf = lineFileOpen(agpFile, TRUE);
char *words[9];
int lastPos = 0;
int wordCount;
struct agpFrag *agpFrag;
struct agpGap *agpGap;
char *chrom;
struct agp *agp;
struct hashEl *hel;

while ((wordCount = lineFileChopNext(lf, words, ArraySize(words))) != 0)
    {
    lineFileExpectAtLeast(lf, 8, wordCount);
    chrom = words[0];
    if (!hashFindVal(agpHash, chrom))
        lastPos = 1;
    AllocVar(agp);
    if (words[4][0] != 'N' && words[4][0] != 'U')
        {
        /* not a gap */
        lineFileExpectWords(lf, 9, wordCount);
        agpFrag = agpFragLoad(words);
        if (agpFrag->chromStart != lastPos)
            errAbort(
               "Frag start (%d, %d) doesn't match previous end line %d of %s\n",
                     agpFrag->chromStart, lastPos, lf->lineIx, lf->fileName);
        if (agpFrag->chromEnd - agpFrag->chromStart != 
                        agpFrag->fragEnd - agpFrag->fragStart)
            errAbort("Sizes don't match in %s and %s line %d of %s\n",
                    agpFrag->chrom, agpFrag->frag, lf->lineIx, lf->fileName);
        lastPos = agpFrag->chromEnd + 1;
        agp->entry = agpFrag;
        agp->isFrag = TRUE;
        }
    else
        {
        /* gap */
        lineFileExpectWords(lf, 8, wordCount);
        agpGap = agpGapLoad(words);
        if (agpGap->chromStart != lastPos)
            errAbort("Gap start (%d, %d) doesn't match previous end line %d of %s\n",
                     agpGap->chromStart, lastPos, lf->lineIx, lf->fileName);
        lastPos = agpGap->chromEnd + 1;
        agp->entry = agpGap;
        agp->isFrag = FALSE;
        }
    if ((hel = hashLookup(agpHash, chrom)) == NULL)
        hashAdd(agpHash, chrom, agp);
    else
        slAddHead(&(hel->val), agp);
    }
#ifndef DEBUG
    {
struct hashCookie cookie;
struct hashEl *hel;
cookie = hashFirst(agpHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct agp *agpList;
    agpList = (struct agp *)hel->val;
    /*
    for (agp = agpList; agp != NULL; agp = agp->next)
        printf("isFrag: %d\n", agp->isFrag);
        */
    }
    }
#endif
/* reverse AGP lists */
//hashTraverseVals(agpHash, slReverse);
#ifndef DEBUG
    {
struct hashCookie cookie;
struct hashEl *hel;
cookie = hashFirst(agpHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct agp *agpList;
    slReverse(&hel->val);
    agpList = hel->val;
    /*
    agpList = (struct agp *)hel->val;
    slReverse(&agpList);
    hashRemove(agpHash, hel->name);
    hashAdd(agpHash, hel->name, agpList);
    */
    /*
    for (agp = agpList; agp != NULL; agp = agp->next)
        printf("isFrag: %d\n", agp->isFrag);
        */
    }
    }
#endif
return agpHash;
}
struct chrom *readChromScaffoldsFromAgp(char *fileName)
/* Read in agp file and return as list of chroms. */
{
struct hash *chromHash = newHash(17);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[9];
int wordCount;
struct chrom *chromList = NULL, *chrom;
struct agpFrag *frag = NULL;
struct agpGap *gap = NULL;
char *chromName;
int chromSize = 0;

for (;;)
    {
    wordCount = lineFileChop(lf, row);
    if (wordCount <= 0)
        break;
    if (wordCount < 8)
	lineFileShort(lf);

    if (row[4][0] == 'N' || row[4][0] == 'U')
        {
        /* need to get chromEnd from gaps to determine chrom size
         * if the chrom ends with a gap */
        gap = agpGapLoad(row);
        chromName = gap->chrom;
        chromSize = gap->chromEnd;
        frag = NULL;
        }
    else
        {
        if (wordCount < 9)
            lineFileShort(lf);
        frag = agpFragLoad(row);
        chromName = frag->chrom;
        chromSize = frag->chromEnd;
        frag->chromStart -= 1;
        frag->fragStart -= 1;
        if (frag->fragEnd - frag->fragStart != 
            frag->chromEnd - frag->chromStart)
                errAbort("chrom/scaffold size mismatch line %d of %s",
                                  lf->lineIx, lf->fileName);
        }
    chrom = hashFindVal(chromHash, chromName);
    if (chrom == NULL)
        {
        AllocVar(chrom);
        slAddHead(&chromList, chrom);
        hashAdd(chromHash, chromName, chrom);
        }
    chrom->size = max(chromSize, chrom->size);
    if (frag != NULL)
        slAddHead(&chrom->list, frag);
    }
slReverse(&chromList);
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    slReverse(&chrom->list);
verbose(1, "Got %d chroms in %s\n", slCount(chromList), lf->fileName);
lineFileClose(&lf);
hashFree(&chromHash);
return chromList;
}