void featureBits(char *database, int tableCount, char *tables[]) /* featureBits - Correlate tables via bitmap projections and booleans. */ { struct sqlConnection *conn = NULL; char *bedName = optionVal("bed", NULL), *faName = optionVal("fa", NULL); char *binName = optionVal("bin", NULL); char *bedRegionInName = optionVal("bedRegionIn", NULL); char *bedRegionOutName = optionVal("bedRegionOut", NULL); FILE *bedFile = NULL, *faFile = NULL, *binFile = NULL; FILE *bedRegionOutFile = NULL; struct bed *bedRegionList = NULL; boolean faIndependent = FALSE; struct chromInfo *cInfo; if (bedName) bedFile = mustOpen(bedName, "w"); if (binName) binFile = mustOpen(binName, "w"); if ((bedRegionInName && !bedRegionOutName) || (!bedRegionInName && bedRegionOutName)) errAbort("bedRegionIn and bedRegionOut must both be specified"); if (faName) { boolean faMerge = optionExists("faMerge"); faFile = mustOpen(faName, "w"); if (tableCount > 1) { if (!faMerge) errAbort("For fa output of multiple tables you must use the " "faMerge option"); } faIndependent = (!faMerge); } if (chromSizes != NULL) chromInfoList = chromInfoLoadAll(chromSizes); else chromInfoList = fbCreateChromInfoList(clChrom, database); if (!countGaps) conn = hAllocConn(database); checkInputExists(conn, database, chromInfoList, tableCount, tables); if (!faIndependent) { double totalBases = 0, totalBits = 0; int firstTableBits = 0, secondTableBits = 0; int *pFirstTableBits = NULL, *pSecondTableBits = NULL; double totalFirstBits = 0, totalSecondBits = 0; static int dotClock = 1; if (calcEnrichment) { pFirstTableBits = &firstTableBits; pSecondTableBits = &secondTableBits; } if (bedRegionInName) { struct lineFile *lf = lineFileOpen(bedRegionInName, TRUE); struct bed *bed; char *row[3]; bedRegionOutFile = mustOpen(bedRegionOutName, "w"); while (lineFileRow(lf, row)) { if (startsWith(row[0],"#")||startsWith(row[0],"chrom")) continue; bed = bedLoad3(row); slAddHead(&bedRegionList, bed); } lineFileClose(&lf); slReverse(&bedRegionList); } for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next) { if (inclChrom(cInfo->chrom)) { int chromBitSize; int chromSize = cInfo->size; verbose(3,"chromFeatureBits(%s)\n", cInfo->chrom); chromFeatureBits(conn, database, cInfo->chrom, tableCount, tables, bedFile, faFile, binFile, bedRegionList, bedRegionOutFile, chromSize, &chromBitSize, pFirstTableBits, pSecondTableBits ); totalBases += countBases(conn, cInfo->chrom, chromSize, database); totalBits += chromBitSize; totalFirstBits += firstTableBits; totalSecondBits += secondTableBits; if (dots > 0) { if (--dotClock <= 0) { fputc('.', stdout); fflush(stdout); dotClock = dots; } } } } if (dots > 0) { fputc('\n', stdout); fflush(stdout); } if (calcEnrichment) fprintf(stderr,"%s %5.3f%%, %s %5.3f%%, both %5.3f%%, cover %4.2f%%, enrich %4.2fx\n", tables[0], 100.0 * totalFirstBits/totalBases, tables[1], 100.0 * totalSecondBits/totalBases, 100.0 * totalBits/totalBases, 100.0 * totalBits / totalFirstBits, (totalBits/totalSecondBits) / (totalFirstBits/totalBases) ); else fprintf(stderr,"%1.0f bases of %1.0f (%4.3f%%) in intersection\n", totalBits, totalBases, 100.0*totalBits/totalBases); } else { int totalItems = 0; double totalBases = 0; int itemCount, baseCount; for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next) { if (inclChrom(cInfo->chrom)) { chromFeatureSeq(conn, database, cInfo->chrom, tables[0], bedFile, faFile, &itemCount, &baseCount); totalBases += countBases(conn, cInfo->chrom, baseCount, database); totalItems += itemCount; } } } hFreeConn(&conn); }
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack, struct hash *otherHash, struct stats *stats) /* Process one chromosome. */ { struct bed *bedList = NULL, *bed; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; int rowOffset; int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct psl *pslList = NULL; struct dnaSeq *chromSeq = NULL; if (endsWith(bedTrack, ".bed")) { struct lineFile *lf = lineFileOpen(bedTrack, TRUE); char *row[3]; while (lineFileRow(lf, row)) { if (sameString(chrom, row[0])) { bed = bedLoad3(row); slAddHead(&bedList, bed); } } lineFileClose(&lf); } else { sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { bed = bedLoad3(row+rowOffset); slAddHead(&bedList, bed); } sqlFreeResult(&sr); } slReverse(&bedList); uglyf("Loaded beds\n"); sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row + rowOffset); slAddHead(&pslList, psl); binKeeperAdd(bk, psl->tStart, psl->tEnd, psl); } sqlFreeResult(&sr); uglyf("Loaded psls\n"); chromSeq = hLoadChrom(database, chrom); /* Fetch entire chromosome into memory. */ uglyf("Loaded human seq\n"); for (bed = bedList; bed != NULL; bed = bed->next) { struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd); for (el = list; el != NULL; el = el->next) { struct psl *fullPsl = el->val; struct psl *psl = pslTrimToTargetRange(fullPsl, bed->chromStart, bed->chromEnd); if (psl != NULL) { foldPslIntoStats(psl, chromSeq, otherHash, stats); pslFree(&psl); } } slFreeList(&list); stats->bedCount += 1; stats->bedBaseCount += bed->chromEnd - bed->chromStart; sqlFreeResult(&sr); } freeDnaSeq(&chromSeq); pslFreeList(&pslList); binKeeperFree(&bk); hFreeConn(&conn); }