struct scaffold *readScaffoldsFromAgp(char *fileName) /* Read in agp file and return as list of scaffolds. */ { struct hash *scaffoldHash = newHash(17); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[9]; int wordCount; struct scaffold *scaffoldList = NULL, *scaffold; struct agpFrag *frag; int size; for (;;) { wordCount = lineFileChop(lf, row); if (wordCount <= 0) break; if (wordCount < 8) lineFileShort(lf); if (row[4][0] == 'N' || row[4][0] == 'U') continue; if (wordCount < 9) lineFileShort(lf); frag = agpFragLoad(row); frag->chromStart -= 1; frag->fragStart -= 1; size = frag->fragEnd - frag->fragStart; if (size != frag->chromEnd - frag->chromStart) errAbort("scaffold/contig size mismatch line %d of %s", lf->lineIx, lf->fileName); if (frag->strand[0] != '+') errAbort("Strand not + line %d of %s", lf->lineIx, lf->fileName); scaffold = hashFindVal(scaffoldHash, frag->chrom); if (scaffold == NULL) { AllocVar(scaffold); hashAdd(scaffoldHash, frag->chrom, scaffold); slAddHead(&scaffoldList, scaffold); } slAddHead(&scaffold->list, frag); if (frag->chromEnd > scaffold->size) scaffold->size = frag->chromEnd; } slReverse(&scaffoldList); for (scaffold = scaffoldList; scaffold != NULL; scaffold = scaffold->next) slReverse(&scaffold->list); printf("Got %d scaffolds in %s\n", slCount(scaffoldList), lf->fileName); lineFileClose(&lf); hashFree(&scaffoldHash); return scaffoldList; }
struct chrom *readChromScaffoldsFromAgp(char *fileName) /* Read in agp file and return as list of chroms. */ { struct hash *chromHash = newHash(17); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[9]; int wordCount; struct chrom *chromList = NULL, *chrom; struct agpFrag *frag = NULL; struct agpGap *gap = NULL; char *chromName; int chromSize = 0; for (;;) { wordCount = lineFileChop(lf, row); if (wordCount <= 0) break; if (wordCount < 8) lineFileShort(lf); if (row[4][0] == 'N' || row[4][0] == 'U') { /* need to get chromEnd from gaps to determine chrom size * if the chrom ends with a gap */ gap = agpGapLoad(row); chromName = gap->chrom; chromSize = gap->chromEnd; frag = NULL; } else { if (wordCount < 9) lineFileShort(lf); frag = agpFragLoad(row); chromName = frag->chrom; chromSize = frag->chromEnd; frag->chromStart -= 1; frag->fragStart -= 1; if (frag->fragEnd - frag->fragStart != frag->chromEnd - frag->chromStart) errAbort("chrom/scaffold size mismatch line %d of %s", lf->lineIx, lf->fileName); } chrom = hashFindVal(chromHash, chromName); if (chrom == NULL) { AllocVar(chrom); slAddHead(&chromList, chrom); hashAdd(chromHash, chromName, chrom); } chrom->size = max(chromSize, chrom->size); if (frag != NULL) slAddHead(&chrom->list, frag); } slReverse(&chromList); for (chrom = chromList; chrom != NULL; chrom = chrom->next) slReverse(&chrom->list); verbose(1, "Got %d chroms in %s\n", slCount(chromList), lf->fileName); lineFileClose(&lf); hashFree(&chromHash); return chromList; }