static void goldLoad(struct track *tg) /* Load up golden path from database table to track items. */ { struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr = NULL; char **row; struct agpFrag *fragList = NULL, *frag; struct agpGap *gapList = NULL, *gap; int rowOffset; /* Get the frags and load into tg->items. */ sr = hRangeQuery(conn, "gold", chromName, winStart, winEnd, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { frag = agpFragLoad(row+rowOffset); slAddHead(&fragList, frag); } slSort(&fragList, cmpAgpFrag); sqlFreeResult(&sr); tg->items = fragList; /* Get the gaps into tg->customPt. */ sr = hRangeQuery(conn, "gap", chromName, winStart, winEnd, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { gap = agpGapLoad(row+rowOffset); slAddHead(&gapList, gap); } slReverse(&gapList); sqlFreeResult(&sr); tg->customPt = gapList; hFreeConn(&conn); }
struct agp *agpLoad(char **row, int ct) /* Load an AGP entry from array of strings. Dispose with agpFree */ { struct agp *agp; struct agpFrag *agpFrag; struct agpGap *agpGap; if (ct < 8) errAbort("Expecting >= 8 words in AGP file, got %d\n", ct); AllocVar(agp); if (row[4][0] != 'N' && row[4][0] != 'U') { /* not a gap */ if (ct != 9) errAbort("Expecting 9 words in AGP fragment line, got %d\n", ct); agpFrag = agpFragLoad(row); agp->entry = agpFrag; agp->isFrag = TRUE; } else { /* gap */ agpGap = agpGapLoad(row); agp->entry = agpGap; agp->isFrag = FALSE; } return agp; }
static void agpToFa(char *agpFile, char *agpSeq, char *faOut, char *seqDir) /* agpToFa - Convert a .agp file to a .fa file. */ { struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *line, *words[16]; int lineSize, wordCount; int lastPos = 0; struct agpFrag *agpList = NULL, *agp; FILE *f = mustOpen(faOut, "w"); char *prevChrom = NULL; verbose(2,"#\tprocessing AGP file: %s\n", agpFile); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == 0 || line[0] == '#' || line[0] == '\n') continue; wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Bad line %d of %s: need at least 5 words, got %d\n", lf->lineIx, lf->fileName, wordCount); if (! (sameWord("all", agpSeq) || sameWord(words[0], agpSeq))) continue; if (prevChrom != NULL && !sameString(prevChrom, words[0])) { agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f); lastPos = 0; } if (words[4][0] != 'N' && words[4][0] != 'U') { lineFileExpectAtLeast(lf, 9, wordCount); agp = agpFragLoad(words); /* file is 1-based but agpFragLoad() now assumes 0-based: */ agp->chromStart -= 1; agp->fragStart -= 1; if (agp->chromStart != lastPos) errAbort("Start doesn't match previous end line %d of %s\n", lf->lineIx, lf->fileName); if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agp->chrom, agp->frag, lf->lineIx, lf->fileName); slAddHead(&agpList, agp); lastPos = agp->chromEnd; } else { lastPos = lineFileNeedNum(lf, words, 2); } if (prevChrom == NULL || !sameString(prevChrom, words[0])) { freeMem(prevChrom); prevChrom = cloneString(words[0]); } } agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f); }
struct scaffold *readScaffoldsFromAgp(char *fileName) /* Read in agp file and return as list of scaffolds. */ { struct hash *scaffoldHash = newHash(17); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[9]; int wordCount; struct scaffold *scaffoldList = NULL, *scaffold; struct agpFrag *frag; int size; for (;;) { wordCount = lineFileChop(lf, row); if (wordCount <= 0) break; if (wordCount < 8) lineFileShort(lf); if (row[4][0] == 'N' || row[4][0] == 'U') continue; if (wordCount < 9) lineFileShort(lf); frag = agpFragLoad(row); frag->chromStart -= 1; frag->fragStart -= 1; size = frag->fragEnd - frag->fragStart; if (size != frag->chromEnd - frag->chromStart) errAbort("scaffold/contig size mismatch line %d of %s", lf->lineIx, lf->fileName); if (frag->strand[0] != '+') errAbort("Strand not + line %d of %s", lf->lineIx, lf->fileName); scaffold = hashFindVal(scaffoldHash, frag->chrom); if (scaffold == NULL) { AllocVar(scaffold); hashAdd(scaffoldHash, frag->chrom, scaffold); slAddHead(&scaffoldList, scaffold); } slAddHead(&scaffold->list, frag); if (frag->chromEnd > scaffold->size) scaffold->size = frag->chromEnd; } slReverse(&scaffoldList); for (scaffold = scaffoldList; scaffold != NULL; scaffold = scaffold->next) slReverse(&scaffold->list); printf("Got %d scaffolds in %s\n", slCount(scaffoldList), lf->fileName); lineFileClose(&lf); hashFree(&scaffoldHash); return scaffoldList; }
struct agpFrag *loadChromAgp(struct sqlConnection *conn, char *chrom) /* Load all AGP fragments for chromosome. */ { char query[256]; struct sqlResult *sr; char **row; struct agpFrag *fragList = NULL, *frag; sqlSafef(query, sizeof query, "select * from %s_gold", chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { frag = agpFragLoad(row); slAddHead(&fragList, frag); } slReverse(&fragList); return fragList; }
struct agpFrag *readAgpFile(char *agpName) /* Read agps from file. */ { struct lineFile *lf = lineFileOpen(agpName, TRUE); int wordCount; char *words[16]; struct agpFrag *list = NULL, *el; while ((wordCount = lineFileChop(lf, words)) != 0) { if (words[4][0] != 'N') { lineFileExpectWords(lf, 9, wordCount); el = agpFragLoad(words); slAddHead(&list, el); } } lineFileClose(&lf); slReverse(&list); return list; }
struct agpData* nextAgpEntryToSplitOn(struct lineFile *lfAgpFile, int dnaSize, struct agpData **retStartData) /* Finds the next agp entry in the agp file at which to split on. param lfAgpFile - The .agp file we are examining. param dnaSize - The total size of the chromsome's dna sequence we are splitting up. Used to prevent overrun of the algorithm that looks at agp entries. param retStartData - An out param returning the starting(inclusive) gap that we will start to split on. return struct agpData* - The ending (inclusive) agp data we are to split on. */ { int startIndex = 0; int numBasesRead = 0; char *line = NULL; char *words[9]; int lineSize = 0; struct agpGap *agpGap = NULL; struct agpFrag *agpFrag = NULL; struct agpData *curAgpData = NULL; struct agpData *prevAgpData = NULL; boolean splitPointFound = FALSE; int splitSize = _nSize; do { lineFileNext(lfAgpFile, &line, &lineSize); if (line[0] == '#' || line[0] == '\n') { continue; } AllocVar(curAgpData); curAgpData->endOfContig = FALSE; curAgpData->isGap = FALSE; curAgpData->prev = NULL; curAgpData->next = NULL; chopLine(line, words); if (words[4][0] == 'N' || words[4][0] == 'U') { agpGap = agpGapLoad(words); /* Decrement the chromStart index since that's how the agpFrags do it and we want to use 0-based addressing */ --(agpGap->chromStart); if (0 == startIndex) { startIndex = agpGap->chromStart; } if (numBasesRead >= _bSize) { splitPointFound = TRUE; } else { /* Split points are made after non-bridged contigs only here */ splitPointFound = (0 == strcasecmp(agpGap->bridge, NO)); } curAgpData->isGap = TRUE; curAgpData->data.pGap = agpGap; } else { agpFrag = agpFragLoad(words); // file is 1-based but agpFragLoad() now assumes 0-based: agpFrag->chromStart -= 1; agpFrag->fragStart -= 1; /* If we find a fragment and not a gap */ if (0 == startIndex) { startIndex = agpFrag->chromStart; } if (numBasesRead >= _aSize) { splitPointFound = TRUE; } else { splitPointFound = FALSE; } curAgpData->isGap = FALSE; curAgpData->data.pFrag = agpFrag; } /* Since this our first loop iteration, save the start gap as the beginning of the section to write out */ if (NULL == prevAgpData) { *retStartData = curAgpData; /* Save the pointer to the head of the list */ } else { /* Build a doubly linked list for use elsewhere */ prevAgpData->next = curAgpData; curAgpData->prev = prevAgpData; } prevAgpData = curAgpData; numBasesRead = curAgpData->data.pGap->chromEnd - startIndex; } while ((numBasesRead < splitSize || !splitPointFound) && curAgpData->data.pGap->chromEnd < dnaSize); curAgpData->next = NULL; /* Terminate the linked list */ curAgpData->endOfContig = TRUE; return curAgpData; }
static void agpSangerUnfinished(char *agpFile, char *contigFasta, char *agpOut) /* Fix agp to match unfinished contigs in fasta */ { struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *line, *words[16]; int lineSize, wordCount; unsigned lastPos = 0; struct agpFrag *agp; struct agpGap *gap; FILE *f; char *lastObj = NULL; f = mustOpen(agpOut, "w"); char *newChrom = NULL; struct hash *hash = hashFasta(contigFasta); verbose(2,"#\tprocessing AGP file: %s\n", agpFile); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == 0 || line[0] == '#' || line[0] == '\n') continue; //verbose(2,"#\tline: %d\n", lf->lineIx); wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Bad line %d of %s: need at least 5 words, got %d\n", lf->lineIx, lf->fileName, wordCount); if (!lastObj || !sameString(words[0],lastObj)) { freez(&newChrom); newChrom = cloneString(words[0]); lastPos = 0; } if (words[4][0] != 'N') { lineFileExpectAtLeast(lf, 9, wordCount); agp = agpFragLoad(words); /* agp is 1-based but agp loaders do not adjust for 0-based: */ agp->chromStart -= 1; agp->fragStart -= 1; if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agp->chrom, agp->frag, lf->lineIx, lf->fileName); char *root = cloneString(agp->frag); chopSuffixAt(root, '.'); struct hashEl *e, *elist = hashLookup(hash, root); for (e = elist; e; e = hashLookupNext(e)) { struct unfinishedContig *u = e->val; if ((u->fragStart <= agp->fragStart) && (u->fragEnd >= agp->fragEnd)) { agp->frag = cloneString(u->frag); agp->fragEnd -= u->fragStart; agp->fragStart -= u->fragStart; } } freeMem(root); } else { lineFileExpectAtLeast(lf, 8, wordCount); gap = agpGapLoad(words); /* to be consistent with agpFrag */ gap->chromStart -= 1; agp = (struct agpFrag*)gap; } if (agp->chromStart != lastPos) errAbort("Start doesn't match previous end line %d of %s\n" "agp->chromStart: %u\n" "agp->chromEnd: %u\n" "lastPos: %u\n" ,lf->lineIx, lf->fileName ,agp->chromStart ,agp->chromEnd ,lastPos ); lastPos = agp->chromEnd; freez(&lastObj); lastObj = cloneString(words[0]); /* not agp->chrom which may be modified already */ if (words[4][0] != 'N') { /* agpFragOutput assumes 0-based-half-open, but writes 1-based for agp */ agpFragOutput(agp, f, '\t', '\n'); agpFragFree(&agp); } else { /* restore back to 1-based for agp * because agpGapOutput doesn't compensate */ gap->chromStart += 1; agpGapOutput(gap, f, '\t', '\n'); agpGapFree(&gap); } } carefulClose(&f); }
static void agpMergeChromScaf(char *agpFile, char *agpOut, boolean filtering) /* Create a combined agp file from the chrom.agp and scaffold.agp, * merging in only scaffolds from scaffold.agp * that are not already in chroms. */ { struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *line, *words[16]; int lineSize, wordCount; unsigned lastPos = 0; struct agpFrag *agp; struct agpGap *gap; FILE *f; char *lastObj = NULL; f = mustOpen(agpOut, filtering ? "a" : "w"); char *newChrom = NULL; static struct hash *hash = NULL; boolean skipping = FALSE; if (!hash) hash = hashNew(0); verbose(2,"#\tprocessing AGP file: %s\n", agpFile); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == 0 || line[0] == '#' || line[0] == '\n') continue; //verbose(2,"#\tline: %d\n", lf->lineIx); wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Bad line %d of %s: need at least 5 words, got %d\n", lf->lineIx, lf->fileName, wordCount); if (!lastObj || !sameString(words[0],lastObj)) { freez(&newChrom); newChrom = cloneString(words[0]); lastPos = 0; } skipping = FALSE; if (filtering) { if (hashLookup(hash, words[0])) skipping = TRUE; } if (words[4][0] != 'N') { lineFileExpectAtLeast(lf, 9, wordCount); agp = agpFragLoad(words); /* agp is 1-based but agp loaders do not adjust for 0-based: */ agp->chromStart -= 1; agp->fragStart -= 1; if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agp->chrom, agp->frag, lf->lineIx, lf->fileName); if (!filtering) { char *root = cloneString(agp->frag); chopSuffixAt(root, '.'); hashStore(hash, root); freeMem(root); } } else { lineFileExpectAtLeast(lf, 8, wordCount); gap = agpGapLoad(words); /* to be consistent with agpFrag */ gap->chromStart -= 1; agp = (struct agpFrag*)gap; } if (agp->chromStart != lastPos) errAbort("Start doesn't match previous end line %d of %s\n" "agp->chromStart: %u\n" "agp->chromEnd: %u\n" "lastPos: %u\n" ,lf->lineIx, lf->fileName ,agp->chromStart ,agp->chromEnd ,lastPos ); lastPos = agp->chromEnd; freez(&lastObj); lastObj = cloneString(words[0]); /* not agp->chrom which may be modified already */ if (words[4][0] != 'N') { /* agpFragOutput assumes 0-based-half-open, but writes 1-based for agp */ if (!skipping) agpFragOutput(agp, f, '\t', '\n'); agpFragFree(&agp); } else { /* restore back to 1-based for agp * because agpGapOutput doesn't compensate */ gap->chromStart += 1; if (!skipping) agpGapOutput(gap, f, '\t', '\n'); agpGapFree(&gap); } } carefulClose(&f); }
struct hash *agpLoadAll(char *agpFile) /* load AGP entries into a hash of AGP lists, one per chromosome */ { struct hash *agpHash = newHash(0); struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *words[9]; int lastPos = 0; int wordCount; struct agpFrag *agpFrag; struct agpGap *agpGap; char *chrom; struct agp *agp; struct hashEl *hel; while ((wordCount = lineFileChopNext(lf, words, ArraySize(words))) != 0) { lineFileExpectAtLeast(lf, 8, wordCount); chrom = words[0]; if (!hashFindVal(agpHash, chrom)) lastPos = 1; AllocVar(agp); if (words[4][0] != 'N' && words[4][0] != 'U') { /* not a gap */ lineFileExpectWords(lf, 9, wordCount); agpFrag = agpFragLoad(words); if (agpFrag->chromStart != lastPos) errAbort( "Frag start (%d, %d) doesn't match previous end line %d of %s\n", agpFrag->chromStart, lastPos, lf->lineIx, lf->fileName); if (agpFrag->chromEnd - agpFrag->chromStart != agpFrag->fragEnd - agpFrag->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agpFrag->chrom, agpFrag->frag, lf->lineIx, lf->fileName); lastPos = agpFrag->chromEnd + 1; agp->entry = agpFrag; agp->isFrag = TRUE; } else { /* gap */ lineFileExpectWords(lf, 8, wordCount); agpGap = agpGapLoad(words); if (agpGap->chromStart != lastPos) errAbort("Gap start (%d, %d) doesn't match previous end line %d of %s\n", agpGap->chromStart, lastPos, lf->lineIx, lf->fileName); lastPos = agpGap->chromEnd + 1; agp->entry = agpGap; agp->isFrag = FALSE; } if ((hel = hashLookup(agpHash, chrom)) == NULL) hashAdd(agpHash, chrom, agp); else slAddHead(&(hel->val), agp); } #ifndef DEBUG { struct hashCookie cookie; struct hashEl *hel; cookie = hashFirst(agpHash); while ((hel = hashNext(&cookie)) != NULL) { struct agp *agpList; agpList = (struct agp *)hel->val; /* for (agp = agpList; agp != NULL; agp = agp->next) printf("isFrag: %d\n", agp->isFrag); */ } } #endif /* reverse AGP lists */ //hashTraverseVals(agpHash, slReverse); #ifndef DEBUG { struct hashCookie cookie; struct hashEl *hel; cookie = hashFirst(agpHash); while ((hel = hashNext(&cookie)) != NULL) { struct agp *agpList; slReverse(&hel->val); agpList = hel->val; /* agpList = (struct agp *)hel->val; slReverse(&agpList); hashRemove(agpHash, hel->name); hashAdd(agpHash, hel->name, agpList); */ /* for (agp = agpList; agp != NULL; agp = agp->next) printf("isFrag: %d\n", agp->isFrag); */ } } #endif return agpHash; }
void fakeFinContigs(char *agpName, char *faName, char *finDir, char *rootName, char *finFaDir, char *ooVer) /* fakeFinContigs - Fake up contigs for a finished chromosome. */ { struct contig *contigList = NULL, *contig = NULL; struct agpFrag *agp; struct lineFile *lf = lineFileOpen(agpName, TRUE); char *line, *words[16]; int lineSize, wordCount; int contigIx = 0; char liftDir[512], contigDir[512], path[512]; char chrom[128]; FILE *f; struct dnaSeq *seq; int fragIx; /* Build up contig list by scanning agp file. */ printf("Reading %s\n", lf->fileName); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '#' || line[0] == 0) continue; wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Expecting at least 5 words line %d of %s", lf->lineIx, lf->fileName); if (words[4][0] == 'N' || words[4][0] == 'U') { contig = NULL; continue; } lineFileExpectWords(lf, 9, wordCount); agp = agpFragLoad(words); // file is 1-based but agpFragLoad() now assumes 0-based: agp->chromStart -= 1; agp->fragStart -= 1; if (contig == NULL) { AllocVar(contig); sprintf(contig->name, "%s%d", rootName, ++contigIx); contig->startOffset = agp->chromStart; slAddHead(&contigList, contig); } else { if (contig->agpList != NULL && contig->agpList->chromEnd != agp->chromStart) errAbort("Start doesn't match previous end line %d of %s", lf->lineIx, lf->fileName); } if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Chrom and frag size mismatch line %d of %s", lf->lineIx, lf->fileName); slAddHead(&contig->agpList, agp); contig->endOffset = agp->chromEnd; } slReverse(&contigList); for (contig = contigList; contig != NULL; contig = contig->next) slReverse(&contig->agpList); lineFileClose(&lf); /* Load up chromosome sequence and make sure it is in one piece. */ printf("Reading %s\n", faName); seq = faReadAllDna(faName); if (slCount(seq) != 1) errAbort("Got %d sequences in %s, can only handle one.", slCount(seq), faName); /* Fix up agp coordinates. Make a directory for each contig. Fill it with * .fa .agp barge.NN files for that contig. */ printf("Writing contig dirs\n"); for (contig = contigList; contig != NULL; contig = contig->next) { /* Make Contig dir. */ sprintf(contigDir, "%s/%s", finDir, contig->name); makeDir(contigDir); /* Make contig.agp file. */ sprintf(path, "%s/%s.agp", contigDir, contig->name); f = mustOpen(path, "w"); fragIx = 0; for (agp = contig->agpList; agp != NULL; agp = agp->next) { char buf[128]; sprintf(buf, "%s/%s", skipChr(agp->chrom), contig->name); freez(&agp->chrom); agp->chrom = cloneString(buf); agp->chromStart -= contig->startOffset; agp->chromEnd -= contig->startOffset; agp->ix = ++fragIx; agpFragTabOut(agp, f); } carefulClose(&f); /* Make ooGreedy.NN.gl file */ sprintf(path, "%s/%s.%s.gl", contigDir, "ooGreedy", ooVer); f = mustOpen(path, "w"); for (agp = contig->agpList; agp != NULL; agp = agp->next) { if (agp->type[0] != 'N' && agp->type[0] != 'U') { fprintf(f, "%s_1\t%d\t%d\t%s\n", agp->frag, agp->chromStart, agp->chromEnd, agp->strand); } } carefulClose(&f); /* Make contig.fa file. */ sprintf(path, "%s/%s.fa", contigDir, contig->name); faWrite(path, contig->name, seq->dna + contig->startOffset, contig->endOffset - contig->startOffset); /* Make contig/barge file. */ sprintf(path, "%s/barge.%s", contigDir, ooVer); f = mustOpen(path, "w"); fprintf(f, "Barge (Connected Clone) File ooGreedy Version %s\n", ooVer); fprintf(f, "\n"); fprintf(f, "start accession size overlap maxClone maxOverlap\n"); fprintf(f, "------------------------------------------------------------\n"); for (agp = contig->agpList; agp != NULL; agp = agp->next) { char clone[128]; strcpy(clone, agp->frag); chopSuffix(clone); fprintf(f, "%d\t%s\t%d\t100\tn/a\t0\n", agp->chromStart, clone, agp->chromEnd); } carefulClose(&f); /* Make contig/gold file. */ sprintf(path, "%s/gold.%s", contigDir, ooVer); f = mustOpen(path, "w"); fragIx = 0; for (agp = contig->agpList; agp != NULL; agp = agp->next) { char fragName[128]; struct agpFrag frag = *agp; sprintf(fragName, "%s_1", agp->frag); frag.frag = fragName; frag.type[0] = '0'; agpFragTabOut(&frag, f); } carefulClose(&f); } /* Create lift subdirectory. */ printf("Creating lift files\n"); sprintf(liftDir, "%s/lift", finDir); makeDir(liftDir); /* Create lift/oOut.lst file (just a list of contigs). */ sprintf(path, "%s/oOut.lst", liftDir); f = mustOpen(path, "w"); for (contig = contigList; contig != NULL; contig = contig->next) fprintf(f, "%s/%s.fa.out\n", contig->name, contig->name); carefulClose(&f); /* Create lift/ordered.lst file (just a list of contigs). */ sprintf(path, "%s/ordered.lst", liftDir); f = mustOpen(path, "w"); for (contig = contigList; contig != NULL; contig = contig->next) fprintf(f, "%s\n", contig->name); carefulClose(&f); /* Create lift/ordered.lft file. */ sprintf(path, "%s/ordered.lft", liftDir); f = mustOpen(path, "w"); splitPath(faName, NULL, chrom, NULL); for (contig = contigList; contig != NULL; contig = contig->next) fprintf(f, "%d\t%s/%s\t%d\t%s\t%d\n", contig->startOffset, skipChr(chrom), contig->name, contig->endOffset - contig->startOffset, chrom, seq->size); carefulClose(&f); }
struct chrom *readChromScaffoldsFromAgp(char *fileName) /* Read in agp file and return as list of chroms. */ { struct hash *chromHash = newHash(17); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[9]; int wordCount; struct chrom *chromList = NULL, *chrom; struct agpFrag *frag = NULL; struct agpGap *gap = NULL; char *chromName; int chromSize = 0; for (;;) { wordCount = lineFileChop(lf, row); if (wordCount <= 0) break; if (wordCount < 8) lineFileShort(lf); if (row[4][0] == 'N' || row[4][0] == 'U') { /* need to get chromEnd from gaps to determine chrom size * if the chrom ends with a gap */ gap = agpGapLoad(row); chromName = gap->chrom; chromSize = gap->chromEnd; frag = NULL; } else { if (wordCount < 9) lineFileShort(lf); frag = agpFragLoad(row); chromName = frag->chrom; chromSize = frag->chromEnd; frag->chromStart -= 1; frag->fragStart -= 1; if (frag->fragEnd - frag->fragStart != frag->chromEnd - frag->chromStart) errAbort("chrom/scaffold size mismatch line %d of %s", lf->lineIx, lf->fileName); } chrom = hashFindVal(chromHash, chromName); if (chrom == NULL) { AllocVar(chrom); slAddHead(&chromList, chrom); hashAdd(chromHash, chromName, chrom); } chrom->size = max(chromSize, chrom->size); if (frag != NULL) slAddHead(&chrom->list, frag); } slReverse(&chromList); for (chrom = chromList; chrom != NULL; chrom = chrom->next) slReverse(&chrom->list); verbose(1, "Got %d chroms in %s\n", slCount(chromList), lf->fileName); lineFileClose(&lf); hashFree(&chromHash); return chromList; }