Esempio n. 1
0
struct scaffold *readScaffoldsFromAgp(char *fileName)
/* Read in agp file and return as list of scaffolds. */
{
struct hash *scaffoldHash = newHash(17);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[9];
int wordCount;
struct scaffold *scaffoldList = NULL, *scaffold;
struct agpFrag *frag;
int size;

for (;;)
    {
    wordCount = lineFileChop(lf, row);
    if (wordCount <= 0)
        break;
    if (wordCount < 8)
	lineFileShort(lf);
    if (row[4][0] == 'N' || row[4][0] == 'U')
        continue;
    if (wordCount < 9)
        lineFileShort(lf);
    frag = agpFragLoad(row);
    frag->chromStart -= 1;
    frag->fragStart -= 1;
    size = frag->fragEnd - frag->fragStart;
    if (size != frag->chromEnd - frag->chromStart)
        errAbort("scaffold/contig size mismatch line %d of %s", lf->lineIx, lf->fileName);
    if (frag->strand[0] != '+')
        errAbort("Strand not + line %d of %s", lf->lineIx, lf->fileName);
    scaffold = hashFindVal(scaffoldHash, frag->chrom);
    if (scaffold == NULL)
        {
	AllocVar(scaffold);
	hashAdd(scaffoldHash, frag->chrom, scaffold);
	slAddHead(&scaffoldList, scaffold);
	}
    slAddHead(&scaffold->list, frag);
    if (frag->chromEnd > scaffold->size)
        scaffold->size = frag->chromEnd;
    }
slReverse(&scaffoldList);
for (scaffold = scaffoldList; scaffold != NULL; scaffold = scaffold->next)
    slReverse(&scaffold->list);
printf("Got %d scaffolds in %s\n", slCount(scaffoldList), lf->fileName);
lineFileClose(&lf);
hashFree(&scaffoldHash);
return scaffoldList;
}
struct chrom *readChromScaffoldsFromAgp(char *fileName)
/* Read in agp file and return as list of chroms. */
{
struct hash *chromHash = newHash(17);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[9];
int wordCount;
struct chrom *chromList = NULL, *chrom;
struct agpFrag *frag = NULL;
struct agpGap *gap = NULL;
char *chromName;
int chromSize = 0;

for (;;)
    {
    wordCount = lineFileChop(lf, row);
    if (wordCount <= 0)
        break;
    if (wordCount < 8)
	lineFileShort(lf);

    if (row[4][0] == 'N' || row[4][0] == 'U')
        {
        /* need to get chromEnd from gaps to determine chrom size
         * if the chrom ends with a gap */
        gap = agpGapLoad(row);
        chromName = gap->chrom;
        chromSize = gap->chromEnd;
        frag = NULL;
        }
    else
        {
        if (wordCount < 9)
            lineFileShort(lf);
        frag = agpFragLoad(row);
        chromName = frag->chrom;
        chromSize = frag->chromEnd;
        frag->chromStart -= 1;
        frag->fragStart -= 1;
        if (frag->fragEnd - frag->fragStart != 
            frag->chromEnd - frag->chromStart)
                errAbort("chrom/scaffold size mismatch line %d of %s",
                                  lf->lineIx, lf->fileName);
        }
    chrom = hashFindVal(chromHash, chromName);
    if (chrom == NULL)
        {
        AllocVar(chrom);
        slAddHead(&chromList, chrom);
        hashAdd(chromHash, chromName, chrom);
        }
    chrom->size = max(chromSize, chrom->size);
    if (frag != NULL)
        slAddHead(&chrom->list, frag);
    }
slReverse(&chromList);
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    slReverse(&chrom->list);
verbose(1, "Got %d chroms in %s\n", slCount(chromList), lf->fileName);
lineFileClose(&lf);
hashFree(&chromHash);
return chromList;
}