void getChromSizes(char *database, struct hash **retHash, struct chromSizes **retList) /* Return hash of chromSizes. Also calculates size without * gaps. */ { struct sqlConnection *conn = hAllocConn(database); struct chromInfo *ci, *ciList = getAllChromInfo(database); struct sqlResult *sr; char **row; struct chromSizes *cs, *csList = NULL; struct hash *hash = newHash(8); int rowOffset; for (ci = ciList; ci != NULL; ci = ci->next) { AllocVar(cs); hashAddSaveName(hash, ci->chrom, cs, &cs->name); slAddHead(&csList, cs); cs->totalSize = ci->size; sr = hChromQuery(conn, "gold", ci->chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct agpFrag frag; agpFragStaticLoad(row + rowOffset, &frag); cs->seqSize += frag.chromEnd - frag.chromStart; } sqlFreeResult(&sr); } hFreeConn(&conn); slReverse(&csList); *retHash = hash; *retList = csList; }
void readPatch(char *fileName, struct hash *cloneHash, struct ntContig **retNtList, struct hash **retNtHash) /* Read nt.agp file into clone/hash. */ { struct ntContig *ntList = NULL, *nt = NULL; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[9]; struct agpFrag frag; struct clone *clone, *ntClone, *lastClone = NULL; struct cloneRef *ref; struct hash *ntHash = newHash(0); char cloneName[128]; char fragName[128]; char c; int ntOrder = 0; while (lineFileRow(lf, row)) { agpFragStaticLoad(row, &frag); // file is 1-based but agpFragLoad() now assumes 0-based: frag.chromStart -= 1; frag.fragStart -= 1; if (nt == NULL || !sameString(frag.chrom, nt->name)) { AllocVar(nt); slAddHead(&ntList, nt); if (hashLookup(ntHash, frag.chrom) != NULL) errAbort("NT contig %s repeated line %d of %s", row[0], lf->lineIx, lf->fileName); hashAddSaveName(ntHash, frag.chrom, nt, &nt->name); lastClone = NULL; ntOrder = 0; } strcpy(cloneName, frag.frag); chopSuffix(cloneName); clone = hashMustFindVal(cloneHash, cloneName); clone->ntStart = frag.chromStart; clone->ntEnd = frag.chromEnd; if (clone->nt != NULL) { warn("Clone %s trying to be in two NT contigs (%s and %s) line %d of %s", clone->name, clone->nt->name, nt->name, lf->lineIx, lf->fileName); nt->problem = TRUE; } clone->nt = nt; c = frag.strand[0]; if (c == '-') clone->ntOrientation = -1; else if (c == '+') clone->ntOrientation = +1; else errAbort("Expecting +1 or -1 field 5, line %d, file %s", lf->lineIx, lf->fileName); c = frag.type[0]; if (c == 'F' || c == 'D' || c == 'P') clone->seqType = c; else errAbort("Expecting F, D, or P field 6, line %d, file %s", lf->lineIx, lf->fileName); sprintf(fragName, "%s_1", frag.frag); clone->fragName = cloneString(fragName); clone->goldStart = frag.fragStart; clone->goldEnd = frag.fragEnd; clone->ntOrder = ntOrder++; /* Add ref to NT. */ AllocVar(ref); ref->ref = clone; slAddTail(&nt->cloneList, ref); /* Do a few tests. */ if (clone->goldStart >= clone->goldEnd) { warn("Clone %s end before start (%d before %d) line %d of %s", clone->name, clone->goldStart, clone->goldEnd, lf->lineIx, lf->fileName); nt->problem = TRUE; } if (clone->ntStart >= clone->ntEnd) { warn("Clone %s NT end before NT start line %d of %s", clone->name, lf->lineIx, lf->fileName); nt->problem = TRUE; } if (clone->goldEnd > clone->size) { if (sameString(clone->startFrag, clone->endFrag)) { warn("Clone %s end position %d, clone size %d, line %d of %s", clone->name, clone->goldEnd, clone->size, lf->lineIx, lf->fileName); nt->problem = TRUE; } } if (clone->ntEnd - clone->ntStart != clone->goldEnd - clone->goldStart) { warn("Size not the same in NT contig as in clone %s (%d vs %d) line %d of %s", clone->name, clone->ntEnd - clone->ntStart, clone->goldEnd-clone->goldStart, lf->lineIx, lf->fileName); nt->problem = TRUE; } nt->sumSize += clone->goldEnd - clone->goldStart; ntClone = hashFindVal(cloneHash, nt->name); if (ntClone != NULL && clone->ntEnd > ntClone->size) { warn("Clone %s NT end position %d, NT size %d, line %d of %s", clone->name, clone->ntEnd, ntClone->size, lf->lineIx, lf->fileName); nt->problem = TRUE; } if (ntClone != NULL) nt->size = ntClone->size; else nt->size = clone->size; /* This happens for single-clone NT contigs only. */ if (lastClone != NULL) { if (lastClone->ntEnd != clone->ntStart) { warn("last clone (%s)'s end doesn't match with current clone (%s)'s start line %d of %s", lastClone->name, clone->name, lf->lineIx, lf->fileName); } } lastClone = clone; } lineFileClose(&lf); slReverse(&ntList); for (nt = ntList; nt != NULL; nt = nt->next) { if (nt->sumSize != nt->size) { warn("Sum of fragments of %s is %d, but size is supposed to be %d", nt->name, nt->sumSize, nt->size); nt->problem = TRUE; } } *retNtList = ntList; *retNtHash = ntHash; }
void splitAgp(char *agpName, char *goldFileName, char *gapFileName) /* Split up agp file into gold and gap files. */ { struct lineFile *lf; char *words[16]; int wordCount; FILE *goldTab, *gapTab; /* Scan through .agp file splitting it into gold * and gap components. */ goldTab = mustOpen(goldFileName, "w"); gapTab = mustOpen(gapFileName, "w"); lf = lineFileOpen(agpName, TRUE); while ((wordCount = lineFileChop(lf, words)) > 0) { int start, end; if (wordCount < 5) errAbort("Short line %d of %s", lf->lineIx, lf->fileName); int len = strlen(words[0]); if (len > maxChromNameSize) { maxChromNameSize = len; if (maxChromNameSize > 254) errAbort("ERROR: chrom name size is over 254(%d) characters: " "'%s'", maxChromNameSize, words[0]); } start = sqlUnsigned(words[1])-1; end = sqlUnsigned(words[2]); if (words[4][0] == 'N' || words[4][0] == 'U') { struct agpGap gap; agpGapStaticLoad(words, &gap); gap.chromStart -= 1; fprintf(gapTab, "%u\t", hFindBin(start, end)); agpGapTabOut(&gap, gapTab); verbose(3,"#GAP\t%s:%d-%d\n", gap.chrom, gap.chromStart, gap.chromEnd); } else { struct agpFrag gold; agpFragStaticLoad(words, &gold); agpFragValidate(&gold); len = strlen(words[5]); if (len > maxFragNameSize) { maxFragNameSize = len; if (maxFragNameSize > 254) errAbort("ERROR: fragment name size is over 254(%d) " "characters: '%s'", maxFragNameSize, words[5]); } // file is 1-based. agpFragLoad() now assumes 0-based. // and agpFragTabOut() will assume 1-based, but we will load // the generated file straight into the database, so // subtract 2: gold.chromStart -= 2; gold.fragStart -= 2; fprintf(goldTab, "%u\t", hFindBin(start, end)); agpFragTabOut(&gold, goldTab); } } lineFileClose(&lf); carefulClose(&goldTab); carefulClose(&gapTab); }