void doOneChrom(char *database, char *chrom, char *rnaTable, char *expTable, FILE *f) /* Process one chromosome. */ { int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; struct bed *exp, *rna; int rowOffset; struct binElement *be, *beList; int oneCount; /* Load up expTable into bin-keeper. */ sr = hChromQuery(conn, expTable, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { exp = bedLoadN(row + rowOffset, 12); binKeeperAdd(bk, exp->chromStart, exp->chromEnd, exp); } sqlFreeResult(&sr); /* Loop through rnaTable and look at intersections. */ sr = hChromQuery(conn, rnaTable, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { rna = bedLoadN(row + rowOffset, 12); beList = binKeeperFind(bk, rna->chromStart, rna->chromEnd); oneCount = 0; for (be = beList; be != NULL; be = be->next) { exp = be->val; if (exp->strand[0] == rna->strand[0]) { ++oneCount; ++hitCount; // fprintf(f, "%s:%d-%d\t%s\t%s\n", // rna->chrom, rna->chromStart, rna->chromEnd, rna->name, exp->name); } } slFreeList(&beList); if (oneCount == 0) { ++missCount; fprintf(f, "miss %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); } else if (oneCount == 1) { fprintf(f, "uniq %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); ++uniqCount; } else { fprintf(f, "dupe %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); ++dupeCount; } } sqlFreeResult(&sr); hFreeConn(&conn); }
void getChromSizes(char *database, struct hash **retHash, struct chromSizes **retList) /* Return hash of chromSizes. Also calculates size without * gaps. */ { struct sqlConnection *conn = hAllocConn(database); struct chromInfo *ci, *ciList = getAllChromInfo(database); struct sqlResult *sr; char **row; struct chromSizes *cs, *csList = NULL; struct hash *hash = newHash(8); int rowOffset; for (ci = ciList; ci != NULL; ci = ci->next) { AllocVar(cs); hashAddSaveName(hash, ci->chrom, cs, &cs->name); slAddHead(&csList, cs); cs->totalSize = ci->size; sr = hChromQuery(conn, "gold", ci->chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct agpFrag frag; agpFragStaticLoad(row + rowOffset, &frag); cs->seqSize += frag.chromEnd - frag.chromStart; } sqlFreeResult(&sr); } hFreeConn(&conn); slReverse(&csList); *retHash = hash; *retList = csList; }
void scanChromTable(struct sqlConnection *conn, char *chrom, char *table) /* Scan chromosome table, don't do anything with data. */ { struct sqlResult *sr; int rowOffset; char **row; sr = hChromQuery(conn, table, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) ; sqlFreeResult(&sr); }
void bestProbeOverlap(struct sqlConnection *conn, char *probeTable, struct genePred *gpList, struct hash *gpToProbeHash) /* Create hash of most overlapping probe if any for each gene. Require * at least 100 base overlap. */ { /* Create a hash of binKeepers filled with probes. */ struct hash *keeperHash = keepersForChroms(conn); struct hashCookie it = hashFirst(keeperHash); struct hashEl *hel; int pslCount = 0; while ((hel = hashNext(&it)) != NULL) { char *chrom = hel->name; struct binKeeper *bk = hel->val; int rowOffset; struct sqlResult *sr = hChromQuery(conn, probeTable, chrom, NULL, &rowOffset); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row+rowOffset); binKeeperAdd(bk, psl->tStart, psl->tEnd, psl); ++pslCount; } sqlFreeResult(&sr); } verbose(2, "Loaded %d psls from %s\n", pslCount, probeTable); /* Loop through gene list, finding best probe if any for each gene. */ struct genePred *gp; for (gp = gpList; gp != NULL; gp = gp->next) { struct rbTree *rangeTree = genePredToRangeTree(gp, FALSE); struct psl *bestPsl = NULL; int bestOverlap = 99; /* MinOverlap - 1 */ struct binKeeper *bk = hashMustFindVal(keeperHash, gp->chrom); struct binElement *bin, *binList = binKeeperFind(bk, gp->txStart, gp->txEnd); for (bin = binList; bin != NULL; bin = bin->next) { struct psl *psl = bin->val; if (psl->strand[0] == gp->strand[0]) { int overlap = pslRangeTreeOverlap(psl, rangeTree); if (overlap > bestOverlap) { bestOverlap = overlap; bestPsl = psl; } } } if (bestPsl != NULL) hashAdd(gpToProbeHash, gp->name, bestPsl->qName); } }
void restrictGaps(char *database, UBYTE *cov, int size, char *chrom) /* Mark gaps as off-limits. */ { int rowOffset; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset); char **row; int s,e; while ((row = sqlNextRow(sr)) != NULL) { s = sqlUnsigned(row[1+rowOffset]); e = sqlUnsigned(row[2+rowOffset]); assert(s >= 0); assert(e <= size); memset(cov + s, restricted, e - s); } sqlFreeResult(&sr); hFreeConn(&conn); }
struct pslReader *pslReaderChromQuery(struct sqlConnection* conn, char* table, char* chrom, char* extraWhere) /* Create a new pslReader to read all rows for a chrom in a database table. * If extraWhere is not null, it is added as an additional where condition. It * will determine if pslx columns are in the table. */ { struct pslReader* pr; int rowOffset; AllocVar(pr); pr->table = cloneString(table); /* non-existant table will return null */ pr->sr = hChromQuery(conn, table, chrom, extraWhere, &rowOffset); if (pr->sr != NULL) getTableInfo(pr); assert(pr->rowOffset == rowOffset); return pr; }
struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom) /* Return a tree of ranges for sequence gaps in chromosome */ { struct rbTree *tree = rbTreeNew(simpleRangeCmp); int rowOffset; struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct agpGap gap; struct simpleRange *range; agpGapStaticLoad(row+rowOffset, &gap); lmAllocVar(tree->lm, range); range->start = gap.chromStart; range->end = gap.chromEnd; rbTreeAdd(tree, range); } sqlFreeResult(&sr); return tree; }
struct genePred *loadGenePred(char *database, char *chrom, char *track, struct binKeeper *bk) /* Load in a gene prediction track to bk. */ { struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; int rowOffset; struct genePred *list = NULL, *el; sr = hChromQuery(conn, track, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { el = genePredLoad(row + rowOffset); binKeeperAdd(bk, el->txStart, el->txEnd, el); slAddHead(&list, el); } sqlFreeResult(&sr); hFreeConn(&conn); slReverse(&list); return list; }
int countBases(struct sqlConnection *conn, char *chrom, int chromSize, char *database) /* Count bases, generally not including gaps, in chromosome. */ { static boolean gapsLoaded = FALSE; struct sqlResult *sr; int totalGaps = 0; char **row; int rowOffset; if (countGaps) return chromSize; /* If doing all chroms, then load up all the gaps and be done with * it instead of re-reading the gap table for every chrom */ if (sameWord(clChrom,"all")) { if (!gapsLoaded) gapHash = loadAllGaps(conn, database); gapsLoaded = TRUE; totalGaps = hashIntValDefault(gapHash, chrom, 0); } else { sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { int gapSize; struct agpGap gap; agpGapStaticLoad(row+rowOffset, &gap); gapSize = gap.chromEnd - gap.chromStart; totalGaps += gapSize; } sqlFreeResult(&sr); } return chromSize - totalGaps; }
void loadPslsFromDatabase(struct sqlConnection *conn, char *db, char *chrom) /** Load all of the desired alignments into the chromkeeper structure from the desired pslTables. */ { int i = 0; struct sqlResult *sr = NULL; char **row = NULL; int rowOffset = 0; struct psl *pslList = NULL, *psl = NULL; for(i = 0; i < numDbTables; i++) { sr = hChromQuery(conn, dbTables[i], chrom, NULL, &rowOffset); while((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row+rowOffset); slAddHead(&pslList, psl); minPslStart = min(psl->tStart, minPslStart); maxPslEnd = max(psl->tEnd, maxPslEnd); /* This just adds the mrna twice to the list, cheat way to add more weight to certain tables. */ if(weightMrna && (stringIn("refSeqAli", dbTables[i]) || stringIn("mrna", dbTables[i]))) { psl = clonePsl(psl); slAddHead(&pslList, psl); } } sqlFreeResult(&sr); } chromPslBin = binKeeperNew(minPslStart, maxPslEnd); agxSeenBin = binKeeperNew(minPslStart, maxPslEnd); for(psl = pslList; psl != NULL; psl = psl->next) { binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl); } }
static struct hash *loadAllGaps(struct sqlConnection *conn, char *db) /* working on all chroms, fetch all per-chrom gap counts at once * returns hash by chrom name to gap counts for that chrom */ { struct chromInfo *cInfo; struct sqlResult *sr; char **row; struct hash *ret; int totalGapSize = 0; int gapCount = 0; ret = newHash(0); /* If not split, read in whole gulp, create per-chrom hash of sizes */ if (hTableExists(db, "gap")) { char *prevChrom = NULL; int totalGapsThisChrom = 0; sr = sqlGetResult(conn, NOSQLINJ "select chrom,chromStart,chromEnd from gap order by chrom"); while ((row = sqlNextRow(sr)) != NULL) { int gapSize = sqlUnsigned(row[2]) - sqlUnsigned(row[1]); ++gapCount; if (prevChrom && sameWord(prevChrom,row[0])) { totalGapsThisChrom += gapSize; totalGapSize += gapSize; } else { if (prevChrom) { hashAddInt(ret, prevChrom, totalGapsThisChrom); freeMem(prevChrom); prevChrom = cloneString(row[0]); totalGapsThisChrom = gapSize; totalGapSize += gapSize; } else { prevChrom = cloneString(row[0]); totalGapsThisChrom = gapSize; totalGapSize += gapSize; } } } /* and the last one */ if (prevChrom && (totalGapsThisChrom > 0)) { hashAddInt(ret, prevChrom, totalGapsThisChrom); freeMem(prevChrom); } sqlFreeResult(&sr); } else { /* for each chrom name, fetch the gap count */ for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next) { int rowOffset; int totalGapsThisChrom = 0; sr = hChromQuery(conn, "gap", cInfo->chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { int gapSize; struct agpGap gap; ++gapCount; agpGapStaticLoad(row+rowOffset, &gap); gapSize = gap.chromEnd - gap.chromStart; totalGapsThisChrom += gapSize; totalGapSize += gapSize; } sqlFreeResult(&sr); hashAddInt(ret, cInfo->chrom, totalGapsThisChrom); } } verbose(2,"#\tloaded %d gaps covering %d bases\n", gapCount, totalGapSize); return ret; }
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack, struct hash *otherHash, struct stats *stats) /* Process one chromosome. */ { struct bed *bedList = NULL, *bed; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; int rowOffset; int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct psl *pslList = NULL; struct dnaSeq *chromSeq = NULL; if (endsWith(bedTrack, ".bed")) { struct lineFile *lf = lineFileOpen(bedTrack, TRUE); char *row[3]; while (lineFileRow(lf, row)) { if (sameString(chrom, row[0])) { bed = bedLoad3(row); slAddHead(&bedList, bed); } } lineFileClose(&lf); } else { sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { bed = bedLoad3(row+rowOffset); slAddHead(&bedList, bed); } sqlFreeResult(&sr); } slReverse(&bedList); uglyf("Loaded beds\n"); sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row + rowOffset); slAddHead(&pslList, psl); binKeeperAdd(bk, psl->tStart, psl->tEnd, psl); } sqlFreeResult(&sr); uglyf("Loaded psls\n"); chromSeq = hLoadChrom(database, chrom); /* Fetch entire chromosome into memory. */ uglyf("Loaded human seq\n"); for (bed = bedList; bed != NULL; bed = bed->next) { struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd); for (el = list; el != NULL; el = el->next) { struct psl *fullPsl = el->val; struct psl *psl = pslTrimToTargetRange(fullPsl, bed->chromStart, bed->chromEnd); if (psl != NULL) { foldPslIntoStats(psl, chromSeq, otherHash, stats); pslFree(&psl); } } slFreeList(&list); stats->bedCount += 1; stats->bedBaseCount += bed->chromEnd - bed->chromStart; sqlFreeResult(&sr); } freeDnaSeq(&chromSeq); pslFreeList(&pslList); binKeeperFree(&bk); hFreeConn(&conn); }