void countChromWindows(char *database, struct scoredWindow *winList, FILE *f) /* Go through winList and count up how many hit each chromosome. */ { struct chromCounts *ccList = NULL, *cc; struct hash *hash = newHash(8); struct scoredWindow *win; for (win = winList; win != NULL; win = win->next) { char *chrom = win->chrom; cc = hashFindVal(hash, chrom); if (cc == NULL) { AllocVar(cc); slAddHead(&ccList, cc); hashAddSaveName(hash, chrom, cc, &cc->name); cc->chromSize = hChromSize(database, chrom); } cc->count += 1; } fprintf(f, "Finished window count per chromosome:\n"); for (cc = ccList; cc != NULL; cc = cc->next) { fprintf(f, "%s\t%d\t%5.2f%%\n", cc->name, cc->count, 100.0 * cc->count * bigStepSize / cc->chromSize); } fprintf(f, "\n"); slFreeList(&ccList); hashFree(&hash); }
void doOneChrom(char *database, char *chrom, char *rnaTable, char *expTable, FILE *f) /* Process one chromosome. */ { int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; struct bed *exp, *rna; int rowOffset; struct binElement *be, *beList; int oneCount; /* Load up expTable into bin-keeper. */ sr = hChromQuery(conn, expTable, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { exp = bedLoadN(row + rowOffset, 12); binKeeperAdd(bk, exp->chromStart, exp->chromEnd, exp); } sqlFreeResult(&sr); /* Loop through rnaTable and look at intersections. */ sr = hChromQuery(conn, rnaTable, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { rna = bedLoadN(row + rowOffset, 12); beList = binKeeperFind(bk, rna->chromStart, rna->chromEnd); oneCount = 0; for (be = beList; be != NULL; be = be->next) { exp = be->val; if (exp->strand[0] == rna->strand[0]) { ++oneCount; ++hitCount; // fprintf(f, "%s:%d-%d\t%s\t%s\n", // rna->chrom, rna->chromStart, rna->chromEnd, rna->name, exp->name); } } slFreeList(&beList); if (oneCount == 0) { ++missCount; fprintf(f, "miss %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); } else if (oneCount == 1) { fprintf(f, "uniq %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); ++uniqCount; } else { fprintf(f, "dupe %s:%d-%d %c %s\n", rna->chrom, rna->chromStart, rna->chromEnd, rna->strand[0], rna->name); ++dupeCount; } } sqlFreeResult(&sr); hFreeConn(&conn); }
static void loadCytoBandsIdeo(struct track *tg) /* Load up cytoBandIdeo from database table to track items. */ { if (tg->isBigBed) { struct lm *lm = lmInit(0); int start = 0; int end = hChromSize(database, chromName); struct bigBedInterval *bb, *bbList = bigBedSelectRange(tg, chromName, start, end, lm); char *bedRow[32]; char startBuf[16], endBuf[16]; for (bb = bbList; bb != NULL; bb = bb->next) { bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, ArraySize(bedRow)); struct cytoBand *bed = cytoBandLoad(bedRow); slAddHead(&tg->items, bed); } slReverse(&tg->items); lmCleanup(&lm); return; } char query[256]; sqlSafef(query, sizeof(query), "select * from cytoBandIdeo where chrom like '%s'", chromName); if(hTableExists(database, "cytoBandIdeo")) bedLoadItemByQuery(tg, "cytoBandIdeo", query, (ItemLoader)cytoBandLoad); if(slCount(tg->items) == 0) { tg->limitedVisSet = TRUE; tg->limitedVis = tvHide; } }
void oneTrackInit() /* Set up global variables using cart settings and initialize libs. */ { hPrintDisable(); database = cartUsualString(cart, "db", "ce4"); hSetDb(database); organism = hOrganism(database); withLeftLabels = FALSE; /* Left labels are not supported. */ withCenterLabels = cartUsualBoolean(cart, "centerLabels", FALSE); withGuidelines = cartUsualBoolean(cart, "guidelines", FALSE); revCmplDisp = cartUsualBooleanDb(cart, database, REV_CMPL_DISP, FALSE); position = cartUsualString(cart, "position", "chrI:4001-5000"); hgParseChromRange(position, &chromName, &winStart, &winEnd); insideX = 0; /* Left labels are not supported. */ insideWidth = cartUsualInt(cart, "pix", 640); leftLabelX = 0; leftLabelWidth = 0; /* Left labels are not supported. */ winBaseCount = winEnd - winStart; basesPerPixel = ((float)winBaseCount) / ((float)insideWidth); zoomedToCdsColorLevel = (winBaseCount <= insideWidth*3); seqBaseCount = hChromSize(database, chromName); initTl(); zoomedToBaseLevel = (winBaseCount <= insideWidth / tl.mWidth); zoomedToCodonLevel = (ceil(winBaseCount/3) * tl.mWidth) <= insideWidth; createHgFindMatchHash(); }
static boolean illegalCoordinate(char *db, char *chrom, int start, int end, char *line, int lineIx, struct dyString *dyWarn) /* verify start and end are legal for this chrom */ { int maxEnd = hChromSize(db, chrom); if (start < 0) { dyStringPrintf(dyWarn, "line %d: '%s': chromStart (%d) less than zero\n", lineIx, line, start); return TRUE; } if (end > maxEnd) { dyStringPrintf(dyWarn, "line %d: '%s': chromEnd (%d) greater than chrom length (%s:%d)\n", lineIx, line, end, chrom, maxEnd); return TRUE; } if (start > end) { dyStringPrintf(dyWarn, "line %d: '%s': chromStart (%d) greater than chromEnd (%d)\n", lineIx, line, start, end); return TRUE; } return FALSE; }
void getBinKeeper(char *chromName) /* put SNPs in binKeeper */ { char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; int start = 0; int end = 0; char *rsId = NULL; int chromSize = hChromSize(chromName); verbose(1, "constructing binKeeper...\n"); snps = binKeeperNew(0, chromSize); safef(query, sizeof(query), "select chromStart, chromEnd, name from %s where chrom = '%s'", snpTable, chromName); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { start = sqlUnsigned(row[0]); end = sqlUnsigned(row[1]); rsId = cloneString(row[2]); binKeeperAdd(snps, start, end, rsId); } sqlFreeResult(&sr); hFreeConn(&conn); }
static void checkChromCoords(char *db, char *name, char *chrom, int start, int end) /* check that bounds of a PAR are valid */ { if (start >= end) errAbort("zero or negative PAR length: %s %s:%d-%d", name, chrom, start, end); if ((start < 0) || (end > hChromSize(db, chrom))) errAbort(" PAR out of chromosome bounds: %s %s:%d-%d", name, chrom, start, end); }
int hgSeqChromSize(char *db, char *chromName) /* get chrom size if there's a database out there, * otherwise just return 0 */ { int thisSize = 0; if (hDbExists(db)) thisSize = hChromSize(db, chromName); return thisSize; }
struct dnaSeq *getSkinnySeq(char *sequenceFile, char *chromName) /* mark deletions with '-' */ { char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; struct dnaSeq *seq; char *seqPtr = NULL; int pos = 0; int start = 0; int end = 0; int chromSize = 0; int snpCount = 0; char *snpChrom = NULL; char *rsId = NULL; verbose(1, "sequence file = %s\n", sequenceFile); verbose(1, "chrom = %s\n", chromName); chromSize = hChromSize(chromName); verbose(1, "chromSize = %d\n", chromSize); seq = hFetchSeq(sequenceFile, chromName, 0, chromSize); // seq = hLoadChrom(chromName); touppers(seq->dna); seqPtr = seq->dna; sqlSafef(query, sizeof(query), "select chrom, chromStart, chromEnd, name from %s", snpTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { snpChrom = cloneString(row[0]); start = sqlUnsigned(row[1]); end = sqlUnsigned(row[2]); rsId = cloneString(row[3]); if (!sameString(snpChrom, chromName)) continue; assert (end < chromSize); assert (end > start); snpCount++; for (pos = start; pos < end; pos++) seqPtr[pos] = '-'; } sqlFreeResult(&sr); hFreeConn(&conn); if (snpCount == 0) verbose(1, "no matching SNPs\n"); return seq; }
void intersectOnChrom(char *db, struct sqlConnection *conn, char *chrom, char *track1, char *track2) /* Do intersection on one chromosome. */ { int chromSize = hChromSize(chrom); struct lm *lm = lmInit(0); struct bed *bedList1, *bedList2, *andBed; struct featureBits *fb1, *fb2; Bits *bit1, *bit2; int fieldCount1, fieldCount2; struct binKeeper *bk2; uglyTime(NULL); scanChromTable(conn, chrom, track1); scanChromTable(conn, chrom, track2); uglyTime("Scan tracks"); bedList1 = getChromAsBed(conn, db, track1, chrom, lm, &fieldCount1); bedList2 = getChromAsBed(conn, db, track2, chrom, lm, &fieldCount2); uglyTime("Tracks as bed"); uglyf("%d items with %d fields in %s, ", slCount(bedList1), fieldCount1, track1); uglyf("%d items with %d fields in %s\n", slCount(bedList2), fieldCount2, track2); bit1 = bitAlloc(chromSize+8); bit2 = bitAlloc(chromSize+8); uglyTime("bitAlloc"); fb1 = fbList(db, chrom, track1, bedList1, chromSize); fb2 = fbList(db, chrom, track1, bedList1, chromSize); uglyTime("bed to featureBits list"); fbOrBits(bit1, chromSize, fb1, 0); fbOrBits(bit2, chromSize, fb2, 0); uglyTime("or into bits"); bitAnd(bit1, bit2, chromSize); uglyTime("Anding bitfields"); andBed = bitsToBed4List(bit1, chromSize, chrom, 0, 0, chromSize, lm); uglyTime("Converting bitfield to bed 4"); bitCountAllOverlaps(bedList1, bit2, fieldCount2); uglyTime("Counting overlaps in track1 with bitfield of track2"); bk2 = fbToBinKeeper(fb2, chromSize); uglyTime("Adding featureBits list from track 2 into binKeeper."); bkCountAllOverlaps(bedList1, bk2, fieldCount2); uglyTime("Count overlaps in track1 with binKeeper of track2"); featureBitsFreeList(&fb1); featureBitsFreeList(&fb2); uglyTime("free featureBits"); bitFree(&bit1); bitFree(&bit2); uglyTime("bitFree"); }
void whyConserved(char *database, char *chrom, char *homologyTrack) /* whyConserved - Try and analyse why a particular thing is conserved. */ { struct sqlConnection *conn; int chromSize; Bits *h**o = NULL; Bits *bits = NULL; Bits *once = NULL; hSetDb(database); conn = hAllocConn(); chromSize = hChromSize(chrom); h**o = bitAlloc(chromSize); bits = bitAlloc(chromSize); once = bitAlloc(chromSize); /* Get homology bitmap and set once mask to be the same. */ fbOrTableBits(h**o, homologyTrack, chrom, chromSize, conn); bitOr(once, h**o, chromSize); /* printHeader */ printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "Track Specification", "track", "overlap", "track", "mus", "track", "new", "cum"); printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "", "size", "size", "geno", "track", "mus", "mus", "mus"); printf("-----------------------------------------------------------------------------\n"); /* Whittle awway at homology... */ explainSome(h**o, once, bits, chrom, chromSize, conn, NULL, homologyTrack); explainSome(h**o, once, bits, chrom, chromSize, conn, "simpleRepeat", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "rmsk", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "sanger22:CDS:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "refGene:CDS:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "sanger22:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "refGene:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "ensGene:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "rnaGene", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "mrna:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "intronEst:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "xenoMrna:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "xenoEst:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "genscan:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "genscanSubopt", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "psu:exon:10", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "sanger22:upstream:200", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "refGene:upstream:200", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "mrna:upstream:200", NULL); explainSome(h**o, once, bits, chrom, chromSize, conn, "est", NULL); hFreeConn(&conn); }
static struct bed *bedTable2(struct sqlConnection *conn, struct region *region, char *table2) /* get a bed list, possibly complement, for table2 */ { /* This use of bedTable rather than a bitmap is not really working. The * rest of the table browser does intersection at the exon level, while * the wig code, which this is part of, does it at the gene level. I * noticed it while working on the corresponding routines for bigWig, * which I'm building to work with bitmaps at the exon level. I'm not * sure it's worth fixing this code since nobody has complained, and we're * probably going to be doing mostly bigWig rather than wig in the future. * -JK */ boolean invTable2 = cartCgiUsualBoolean(cart, hgtaInvertTable2, FALSE); char *op = cartString(cart, hgtaIntersectOp); struct bed *bedList = NULL; struct lm *lm1 = lmInit(64*1024); /* fetch table 2 as a bed list */ bedList = getFilteredBeds(conn, table2, region, lm1, NULL); /* If table 2 bed list needs to be complemented (!table2), then do so */ if (invTable2 || sameString("none", op)) { unsigned chromStart = 0; /* start == end == 0 */ unsigned chromEnd = 0; /* means do full chrom */ unsigned chromSize = hChromSize(database, region->chrom); struct lm *lm2 = lmInit(64*1024); struct bed *inverseBedList = NULL; /* new list */ if ((region->start != 0) || (region->end != 0)) { chromStart = region->start; chromEnd = region->end; } if ((struct bed *)NULL == bedList) { if (0 == region->end) chromEnd = chromSize; addBedElement(&inverseBedList, region->chrom, chromStart, chromEnd, 1, lm2); } else inverseBedList=invertBedList(bedList, lm2, region->chrom, chromStart, chromEnd, chromSize); lmCleanup(&lm1); /* == bedFreeList(&bedList) */ return inverseBedList; } else return bedList; }
static void addPadToBed3(struct bed3 *bedList, int atStart, int atEnd) /* Add padding to bed3 list */ { struct bed3 *bed; for (bed = bedList; bed != NULL; bed = bed->next) { int start = bed->chromStart + atStart; if (start < 0) start = 0; int end = bed->chromEnd + atEnd; int chromEnd = hChromSize(database, bed->chrom); if (end > chromEnd) end = chromEnd; bed->chromStart = start; bed->chromEnd = end; } }
static struct mafAli *mafOrAxtLoadInRegion2(struct sqlConnection *conn,struct sqlConnection *conn2, struct trackDb *tdb, char *chrom, int start, int end, char *axtOtherDb, char *file) { if (axtOtherDb != NULL) { struct hash *qSizeHash = hChromSizeHash(axtOtherDb); struct mafAli *mafList = axtLoadAsMafInRegion(conn, tdb->table, chrom, start, end, database, axtOtherDb, hChromSize(database, chrom), qSizeHash); hashFree(&qSizeHash); return mafList; } else return mafLoadInRegion2(conn, conn2, tdb->table, chrom, start, end, file); }
void chromFeatureSeq(struct sqlConnection *conn, char *database, char *chrom, char *trackSpec, FILE *bedFile, FILE *faFile, int *retItemCount, int *retBaseCount) /* Write out sequence file for features from one chromosome. * This separate routine handles the non-merged case. It's * reason for being is so that the feature names get preserved. */ { boolean hasBin; char t[512], *s = NULL; char table[HDB_MAX_TABLE_STRING]; struct featureBits *fbList = NULL, *fb; if (trackSpec[0] == '!') errAbort("Sorry, '!' not available with fa output unless you use faMerge"); isolateTrackPartOfSpec(trackSpec, t); s = strchr(t, '.'); if (s != NULL) errAbort("Sorry, only database (not file) tracks allowed with " "fa output unless you use faMerge"); // ignore isSplit return from hFindSplitTable() (void) hFindSplitTable(database, chrom, t, table, &hasBin); fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom), where, TRUE, TRUE); for (fb = fbList; fb != NULL; fb = fb->next) { int s = fb->start, e = fb->end; if (bedFile != NULL) { fprintf(bedFile, "%s\t%d\t%d\t%s", fb->chrom, fb->start, fb->end, fb->name); if (fb->strand != '?') fprintf(bedFile, "\t0\t%c", fb->strand); fprintf(bedFile, "\n"); } if (faFile != NULL) { struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower); if (fb->strand == '-') reverseComplement(seq->dna, seq->size); faWriteNext(faFile, fb->name, seq->dna, seq->size); freeDnaSeq(&seq); } } featureBitsFreeList(&fbList); }
void chromKeeperInit(char *db) /* Initialize the chromKeeper to a given database (hg15,mm2, etc). */ { struct slName *names = NULL, *name = NULL; int count=0; names = hAllChromNames(db); chromCount = slCount(names); assert(chromNames == NULL && chromRanges == NULL); AllocArray(chromNames, chromCount); AllocArray(chromRanges, chromCount); for(name=names; name != NULL; name = name->next) { int size = hChromSize(db, name->name); chromRanges[count] = binKeeperNew(0,size); chromNames[count] = cloneString(name->name); count++; } slFreeList(&names); }
void writeOutJobs(char *fastaFile, int size, int oligoSize, char *outDir) { struct slName *chromName = NULL; char buff[512]; FILE *out = mustOpen("parasol.spec", "w"); for(chromName = chromNames; chromName != NULL; chromName = chromName->next) { int chromSize = hChromSize(chromName->name); int mark = 0; while(mark + chunkSize < chromSize) { fprintf(out, "nmerAlign %d %d %s%s.nib %s %s/%s:%d-%d.nmer nmerSize=%d\n", mark, mark+chunkSize, dirName, chromName->name, fastaFile, outDir, chromName->name, mark, mark+chunkSize, oligoSize); mark += chunkSize - 25; } fprintf(out, "nmerAlign %d %d %s%s.nib %s %s/%s:%d-%d.nmer nmerSize=%d\n", mark, chromSize , dirName, chromName->name, fastaFile, outDir, chromName->name, mark, chromSize, oligoSize); } }
void trackOverlap(char *database, char *chrom, char *homologyTrack, char *specFile) /* trackOverlap - Correlate a track with a series of tracks specified in specFile. */ { struct lineFile *lf = NULL; char *line = NULL; struct sqlConnection *conn; int chromSize; Bits *h**o = NULL; Bits *bits = NULL; Bits *once = NULL; lf = lineFileOpen(specFile, TRUE); conn = hAllocConn(database); chromSize = hChromSize(database, chrom); h**o = bitAlloc(chromSize); bits = bitAlloc(chromSize); once = bitAlloc(chromSize); /* Get homology bitmap and set once mask to be the same. */ fbOrTableBits(database, h**o, homologyTrack, chrom, chromSize, conn); bitOr(once, h**o, chromSize); /* printHeader */ printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "Track Specification", "track", "overlap", "track", "cov", "track", "new", "cum"); printf("%-21s %8s %8s %5s %6s %6s %5s %5s \n", "", "size", "size", "geno", "track", "cov", "cov", "cov"); printf("-----------------------------------------------------------------------------\n"); /* Whittle awway at homology... */ explainSome(database, h**o, once, bits, chrom, chromSize, conn, NULL, homologyTrack); while(lineFileNextReal(lf, &line)) { explainSome(database, h**o, once, bits, chrom, chromSize, conn, line, NULL); } lineFileClose(&lf); hFreeConn(&conn); }
static boolean illegalCoordinate(char *chrom, int start, int end) /* verify start and end are legal for this chrom */ { int maxEnd = hChromSize(database, chrom); if (start < 0) { warn("chromStart (%d) less than zero", start); return TRUE; } if (end > maxEnd) { warn("chromEnd (%d) greater than chrom length (%s:%d)", end, chrom, maxEnd); return TRUE; } if (start >= end) { warn("chromStart (%d) must be less than chromEnd (%s:%d)", start, chrom, end); return TRUE; } return FALSE; }
void statsOnSpan(char *database, struct sqlConnection *conn, struct region *r, char *axtBestDir, struct stats *stats, FILE *f, struct scoredWindow **pWinList) /* Gather region info on one chromosome/region. */ { char *chrom = r->chrom; int chromSize = hChromSize(database, chrom); Bits *maskBits = bitAlloc(chromSize); Bits *aliBits = bitAlloc(chromSize); Bits *matchBits = bitAlloc(chromSize); Bits *geneBits = bitAlloc(chromSize); /* Set up aliBits and matchBits for to be turned on * where bases align, and where bases align and match. * Zero both bitmaps in areas that are transcribed. */ setAliBits(axtBestDir, chrom, chromSize, aliBits, matchBits); maskFeatures(database, conn, chrom, chromSize, maskBits); bitNot(maskBits, chromSize); bitAnd(aliBits, maskBits, chromSize); bitAnd(matchBits, maskBits, chromSize); /* Set up maskBits to have 0's on gaps in genome */ bitClear(maskBits, chromSize); fbOrTableBits(database, maskBits, "gap", chrom, chromSize, conn); bitNot(maskBits, chromSize); /* Set up bitmap for Ensemble or mRNA. */ fbOrTableBits(database, geneBits, "ensGene", chrom, chromSize, conn); fbOrTableBits(database, geneBits, "mrna", chrom, chromSize, conn); /* Calculate various stats on windows. */ addToStats(stats, aliBits, matchBits, geneBits, maskBits, r, f, pWinList); /* Cleanup */ bitFree(&geneBits); bitFree(&maskBits); bitFree(&aliBits); bitFree(&matchBits); }
void mafSplitPos(char *database, char *size, char *outFile) /* Pick best positions for split close to size. * Use middle of a gap as preferred site. * If not gaps are in range, use recent repeats (0% diverged) */ { int splitSize = 0; int chromSize = 0; struct hash *chromHash; struct hashCookie hc; struct hashEl *hel; struct sqlConnection *conn = sqlConnect(database); FILE *f; db = database; verbose(1, "Finding split positions for %s at ~%s Mbp intervals\n", database, size); splitSize = sqlSigned(size) * 1000000; if (chrom == NULL) { chromHash = hChromSizeHash(database); } else { chromHash = hashNew(6); hashAddInt(chromHash, chrom, hChromSize(database, chrom)); } conn = sqlConnect(database); f = mustOpen(outFile, "w"); hc = hashFirst(chromHash); while ((hel = hashNext(&hc)) != NULL) { chrom = hel->name; chromSize = ptToInt(hel->val); chromSplits(chrom, chromSize, splitSize, conn, f); } sqlDisconnect(&conn); carefulClose(&f); }
void eisenInput(char *database, char *outFile) /* eisenInput - Create input for Eisen-style cluster program. */ { struct slName *chromList = NULL, *chromEl; FILE *f = mustOpen(outFile, "w"); char *chrom; struct hash *refLinkHash = hashNew(0); struct refLink *refLinkList; struct hash *erHash = hashNew(0); struct expRecord *erList = NULL, *er; /* Load info good for all chromosomes. */ refLinkList = loadRefLink(database, refLinkHash); erList = loadExpRecord(expRecordTable, "hgFixed"); for (er = erList; er != NULL; er = er->next) { char sid[16]; snprintf(sid, sizeof(sid), "%u", er->id); hashAdd(erHash, sid, er); } /* Do it chromosome by chromosome. */ chromList = hAllChromNames(database); for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next) { chrom = chromEl->name; uglyf("%s\n", chrom); oneChromInput(database, chrom, hChromSize(database, chrom), "rnaCluster", expTrack, refLinkHash, erHash, f); } /* Cleanup time! */ expRecordFreeList(&erList); freeHash(&erHash); refLinkFreeList(&refLinkList); freeHash(&refLinkHash); }
void axtListReverse(struct axt **axtList, char *queryDb) /* reverse complement an entire axtList */ { struct axt *axt; int tmp; for (axt = *axtList; axt != NULL; axt = axt->next) { int qSize = 0; if (sameString(axt->qName , "gap")) qSize = axt->qEnd; else qSize = hChromSize(queryDb, axt->qName); reverseComplement(axt->qSym, axt->symCount); reverseComplement(axt->tSym, axt->symCount); tmp = qSize - axt->qStart; axt->qStart = qSize - axt->qEnd; axt->qEnd = tmp; } slReverse(axtList); }
void ultraPcrRegions(char *database, char *bedFile, char *outFa) /* ultraPcrRegions - Get regions to PCR up and some surrounding sequence. */ { int extraSize = 1000; FILE *f = mustOpen(outFa, "w"); struct bed *bed, *bedList = bedLoadNAll(bedFile, 4); hSetDb(database); for (bed = bedList; bed != NULL; bed = bed->next) { int bedSize = bed->chromEnd - bed->chromStart; int chromSize = hChromSize(bed->chrom); int seqSize; int seqStart = bed->chromStart - extraSize; int seqEnd = bed->chromEnd + extraSize; int firstParenPos, secondParenPos; struct dyString *dy; char fileName[512]; struct dnaSeq *seq; if (seqStart < 0) seqStart = 0; if (seqEnd > chromSize) seqEnd = chromSize; seqSize = seqEnd - seqStart; firstParenPos = bed->chromStart - seqStart; secondParenPos = firstParenPos + bedSize; seq = hChromSeqMixed(bed->chrom, seqStart, seqEnd); dy = dyStringNew(seqSize+2); dyStringAppendN(dy, seq->dna, firstParenPos); dyStringAppendC(dy, '('); dyStringAppendN(dy, seq->dna+firstParenPos, secondParenPos-firstParenPos); dyStringAppendC(dy, ')'); dyStringAppendN(dy, seq->dna+secondParenPos, seqSize - secondParenPos); faWriteNext(f, bed->name, dy->string, dy->stringSize); } carefulClose(&f); }
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack, struct hash *otherHash, struct stats *stats) /* Process one chromosome. */ { struct bed *bedList = NULL, *bed; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; int rowOffset; int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct psl *pslList = NULL; struct dnaSeq *chromSeq = NULL; if (endsWith(bedTrack, ".bed")) { struct lineFile *lf = lineFileOpen(bedTrack, TRUE); char *row[3]; while (lineFileRow(lf, row)) { if (sameString(chrom, row[0])) { bed = bedLoad3(row); slAddHead(&bedList, bed); } } lineFileClose(&lf); } else { sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { bed = bedLoad3(row+rowOffset); slAddHead(&bedList, bed); } sqlFreeResult(&sr); } slReverse(&bedList); uglyf("Loaded beds\n"); sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row + rowOffset); slAddHead(&pslList, psl); binKeeperAdd(bk, psl->tStart, psl->tEnd, psl); } sqlFreeResult(&sr); uglyf("Loaded psls\n"); chromSeq = hLoadChrom(database, chrom); /* Fetch entire chromosome into memory. */ uglyf("Loaded human seq\n"); for (bed = bedList; bed != NULL; bed = bed->next) { struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd); for (el = list; el != NULL; el = el->next) { struct psl *fullPsl = el->val; struct psl *psl = pslTrimToTargetRange(fullPsl, bed->chromStart, bed->chromEnd); if (psl != NULL) { foldPslIntoStats(psl, chromSeq, otherHash, stats); pslFree(&psl); } } slFreeList(&list); stats->bedCount += 1; stats->bedBaseCount += bed->chromEnd - bed->chromStart; sqlFreeResult(&sr); } freeDnaSeq(&chromSeq); pslFreeList(&pslList); binKeeperFree(&bk); hFreeConn(&conn); }
static struct bed *intersectOnRegion( struct sqlConnection *conn, /* Open connection to database. */ struct region *region, /* Region to work inside */ char *table1, /* Table input list is from. */ struct bed *bedList1, /* List before intersection, should be * all within region. */ struct lm *lm, /* Local memory pool. */ int *retFieldCount) /* Field count. */ /* Intersect bed list, consulting CGI vars to figure out * with what table and how. Return intersected result, * which is independent from input. This potentially will * chew up bedList1. */ { /* Grab parameters for intersection from cart. */ double moreThresh = cartCgiUsualDouble(cart, hgtaMoreThreshold, 0); double lessThresh = cartCgiUsualDouble(cart, hgtaLessThreshold, 100); boolean invTable = cartCgiUsualBoolean(cart, hgtaInvertTable, FALSE); char *op = intersectOp(); /* --- TODO MIKE - replace bedList2, bits2 with baseMask stuff. */ /* Load up intersecting bedList2 (to intersect with) */ int chromSize = hChromSize(database, region->chrom); boolean isBpWise = (sameString("and", op) || sameString("or", op)); Bits *bits2 = bitsForIntersectingTable(conn, region, chromSize, isBpWise); /* Set up some other local vars. */ struct hTableInfo *hti1 = getHti(database, table1, conn); struct bed *intersectedBedList = NULL; /* Produce intersectedBedList. */ if (isBpWise) { /* --- TODO MIKE - replace, bits1 with baseMask stuff. */ /* Base-pair-wise operation: get bitmap for primary table too */ Bits *bits1 = bitAlloc(chromSize+8); boolean hasBlocks = hti1->hasBlocks; if (retFieldCount != NULL && (*retFieldCount < 12)) hasBlocks = FALSE; bedOrBits(bits1, chromSize, bedList1, hasBlocks, 0); /* invert inputs if necessary */ if (invTable) bitNot(bits1, chromSize); /* do the intersection/union */ if (sameString("and", op)) bitAnd(bits1, bits2, chromSize); else bitOr(bits1, bits2, chromSize); /* clip to region if necessary: */ if (region->start > 0) bitClearRange(bits1, 0, region->start); if (region->end < chromSize) bitClearRange(bits1, region->end, (chromSize - region->end)); /* translate back to bed */ intersectedBedList = bitsToBed4List(bits1, chromSize, region->chrom, 1, region->start, region->end, lm); if (retFieldCount != NULL) *retFieldCount = 4; bitFree(&bits1); } else intersectedBedList = filterBedByOverlap(bedList1, hti1->hasBlocks, op, moreThresh, lessThresh, bits2, chromSize); bitFree(&bits2); return intersectedBedList; }
struct bed *getRegionAsMergedBed( char *db, char *table, /* Database and table. */ struct region *region, /* Region to get data for. */ char *filter, /* Filter to add to SQL where clause if any. */ struct hash *idHash, /* Restrict to id's in this hash if non-NULL. */ struct lm *lm, /* Where to allocate memory. */ int *retFieldCount) /* Number of fields. */ /* Return a bed list of all items in the given range in subtrack-merged table. * Cleanup result via lmCleanup(&lm) rather than bedFreeList. */ { if (! anySubtrackMerge(db, table)) return getRegionAsBed(db, table, region, filter, idHash, lm, retFieldCount); else { struct hTableInfo *hti = getHtiOnDb(database, table); int chromSize = hChromSize(database, region->chrom); Bits *bits1 = NULL; Bits *bits2 = NULL; struct bed *bedMerged = NULL; struct trackDb *subtrack = NULL; char *primaryType = findTypeForTable(database,curTrack,table, ctLookupName); char *op = cartString(cart, hgtaSubtrackMergeOp); boolean isBpWise = (sameString(op, "and") || sameString(op, "or")); double moreThresh = cartDouble(cart, hgtaSubtrackMergeMoreThreshold); double lessThresh = cartDouble(cart, hgtaSubtrackMergeLessThreshold); boolean firstTime = TRUE; if (sameString(op, "cat")) { struct bed *bedList = getRegionAsBed(db, table, region, filter, idHash, lm, retFieldCount); struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(curTrack->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { subtrack = tdbRef->val; if (! sameString(curTable, subtrack->table) && isSubtrackMerged(subtrack->table) && sameString(subtrack->type, primaryType)) { struct bed *bedList2 = getRegionAsBed(db, subtrack->table, region, NULL, idHash, lm, retFieldCount); bedList = slCat(bedList, bedList2); } } slFreeList(&tdbRefList); return bedList; } bits1 = bitAlloc(chromSize+8); bits2 = bitAlloc(chromSize+8); /* If doing a base-pair-wise operation, then start with the primary * subtrack's ranges in bits1, and AND/OR all the selected subtracks' * ranges into bits1. If doing a non-bp-wise intersection, then * start with all bits clear in bits1, and then OR selected subtracks' * ranges into bits1. */ if (isBpWise) { struct lm *lm2 = lmInit(64*1024); struct bed *bedList1 = getRegionAsBed(db, table, region, filter, idHash, lm2, retFieldCount); bedOrBits(bits1, chromSize, bedList1, hti->hasBlocks, 0); lmCleanup(&lm2); } struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(curTrack->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { subtrack = tdbRef->val; if (! sameString(curTable, subtrack->table) && isSubtrackMerged(subtrack->table) && sameString(subtrack->type, primaryType)) { struct hTableInfo *hti2 = getHtiOnDb(database, subtrack->table); struct lm *lm2 = lmInit(64*1024); struct bed *bedList2 = getRegionAsBed(db, subtrack->table, region, NULL, idHash, lm2, NULL); if (firstTime) firstTime = FALSE; else bitClear(bits2, chromSize); bedOrBits(bits2, chromSize, bedList2, hti2->hasBlocks, 0); if (sameString(op, "and")) bitAnd(bits1, bits2, chromSize); else bitOr(bits1, bits2, chromSize); lmCleanup(&lm2); } } slFreeList(&tdbRefList); if (isBpWise) { bedMerged = bitsToBed4List(bits1, chromSize, region->chrom, 1, region->start, region->end, lm); if (retFieldCount != NULL) *retFieldCount = 4; } else { struct bed *bedList1 = getRegionAsBed(db, table, region, filter, idHash, lm, retFieldCount); bedMerged = filterBedByOverlap(bedList1, hti->hasBlocks, op, moreThresh, lessThresh, bits1, chromSize); } bitFree(&bits1); bitFree(&bits2); return bedMerged; } }
void sortGenes(struct sqlConnection *conn) /* Put up sort gene page. */ { cartWebStart(cart, database, "Finding Candidate Genes for Gene Sorter"); if (!hgNearOk(database)) errAbort("Sorry, gene sorter not available for this database."); /* Get list of regions. */ struct genoGraph *gg = ggFirstVisible(); double threshold = getThreshold(); struct bed3 *bed, *bedList = regionsOverThreshold(gg); /* Figure out what table and column are the sorter's main gene set. */ struct hash *genomeRa = hgReadRa(genome, database, "hgNearData", "genome.ra", NULL); char *geneTable = hashMustFindVal(genomeRa, "geneTable"); char *idColumn = hashMustFindVal(genomeRa, "idColumn"); /* if marker labels were present when the file was uploaded, they are saved here */ char cgmName[256]; safef(cgmName, sizeof(cgmName), "%s.cgm", gg->binFileName); struct lineFile *m = lineFileMayOpen(cgmName, TRUE); char *cgmRow[4]; cgmRow[0] = ""; /* dummy row */ cgmRow[1] = ""; cgmRow[2] = "0"; cgmRow[3] = "0"; FILE *g = NULL; int markerCount = 0; struct tempName snpTn; if (m) { /* Create custom column output file. */ trashDirFile(&snpTn, "hgg", "marker", ".mrk"); g = mustOpen(snpTn.forCgi, "w"); fprintf(g, "column name=\"%s Markers\" shortLabel=\"%s Markers over threshold\" longLabel=\"%s Markers in regions over threshold\" " "visibility=on priority=99 " "\n" , gg->shortLabel , gg->shortLabel , gg->shortLabel ); } /*** Build up hash of all transcriptHash that are in region. */ struct hash *transcriptHash = hashNew(16); /* This loop handles one chromosome at a time. It depends on * the bedList being sorted by chromosome. */ for (bed = bedList; bed != NULL; ) { /* Make binKeeper and stuff in all regions in this chromosome into it. */ char *chrom = bed->chrom; int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); while (bed != NULL && sameString(chrom, bed->chrom)) { binKeeperAdd(bk, bed->chromStart, bed->chromEnd, bed); bed = bed->next; } struct binKeeper *bkGenes = NULL; if (m) bkGenes = binKeeperNew(0, chromSize); /* Query database to find out bounds of all genes on this chromosome * and if they overlap any of the regions then put them in the hash. */ char query[512]; safef(query, sizeof(query), "select name,txStart,txEnd from %s where chrom='%s'", geneTable, chrom); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; int start = sqlUnsigned(row[1]); int end = sqlUnsigned(row[2]); if (binKeeperAnyOverlap(bk, start, end)) { hashStore(transcriptHash, name); if (m) binKeeperAdd(bkGenes, start, end, cloneString(name)); } } sqlFreeResult(&sr); if (m) { /* Read cgm file if it exists, looking at all markers on this chromosome * and if they overlap any of the regions and genes then output them. */ do { // marker, chrom, chromStart, val char *marker = cgmRow[0]; char *chr = cgmRow[1]; int start = sqlUnsigned(cgmRow[2]); int end = start+1; double val = sqlDouble(cgmRow[3]); int cmp = strcmp(chr,chrom); if (cmp > 0) break; if (cmp == 0) { if (val >= threshold) { struct binElement *el, *bkList = binKeeperFind(bkGenes, start, end); for (el = bkList; el; el=el->next) { /* output to custom column trash file */ fprintf(g, "%s %s\n", (char *)el->val, marker); } if (bkList) { ++markerCount; slFreeList(&bkList); } } } } while (lineFileRow(m, cgmRow)); } /* Clean up for this chromosome. */ binKeeperFree(&bk); if (m) { /* For speed, we do not free up the values (cloned the kg names earlier) */ binKeeperFree(&bkGenes); } } /* Get list of all transcripts in regions. */ struct hashEl *el, *list = hashElListHash(transcriptHash); /* Create file with all matching gene IDs. */ struct tempName keyTn; trashDirFile(&keyTn, "hgg", "key", ".key"); FILE *f = mustOpen(keyTn.forCgi, "w"); for (el = list; el != NULL; el = el->next) fprintf(f, "%s\n", el->name); carefulClose(&f); /* Print out some info. */ hPrintf("Thresholding <i>%s</i> at %g. ", gg->shortLabel, threshold); hPrintf("There are %d regions covering %lld bases.<BR>\n", slCount(bedList), bedTotalSize((struct bed*)bedList) ); hPrintf("Installed a Gene Sorter filter that selects only genes in these regions.<BR>\n"); if (m) { hPrintf("There are %d markers in the regions over threshold that overlap knownGenes.<BR>\n", markerCount); hPrintf("Installed a Gene Sorter custom column called \"%s Markers\" with these markers.<BR>\n", gg->shortLabel); } /* close custom column output file */ if (m) { lineFileClose(&m); carefulClose(&g); } /* Stuff cart variable with name of file. */ char keyCartName[256]; safef(keyCartName, sizeof(keyCartName), "%s%s.keyFile", advFilterPrefix, idColumn); cartSetString(cart, keyCartName, keyTn.forCgi); cartSetString(cart, customFileVarName, snpTn.forCgi); char snpVisCartNameTemp[256]; char *snpVisCartName = NULL; safef(snpVisCartNameTemp, sizeof(snpVisCartNameTemp), "%s%s Markers.vis", colConfigPrefix, gg->shortLabel); snpVisCartName = replaceChars(snpVisCartNameTemp, " ", "_"); cartSetString(cart, snpVisCartName, "1"); freeMem(snpVisCartName); hPrintf("<FORM ACTION=\"../cgi-bin/hgNear\" METHOD=GET>\n"); cartSaveSession(cart); hPrintf("<CENTER>"); cgiMakeButton("submit", "go to gene sorter"); hPrintf("</CENTER>"); hPrintf("</FORM>"); cartWebEnd(); }
void regionPicker(char *database, char *axtBestDir, char *output) /* regionPicker - Code to pick regions to annotate deeply.. */ { struct sqlConnection *conn = NULL; struct slName *allChroms = NULL, *chrom = NULL; struct region *regionList = NULL, *region; FILE *f = mustOpen(output, "w"); struct stats *stats; struct scoredWindow *winList = NULL; struct hash *chromLimitHash = NULL; AllocVar(stats); chromLimitHash = getChromLimits(database); /* Figure out which regions to process from command line. * By default will do whole genome. */ if (sameWord(clRegion, "genome")) { allChroms = hAllChromNames(database); for (chrom = allChroms; chrom != NULL; chrom = chrom->next) { if (!endsWith(chrom->name, "_random")) { AllocVar(region); region->name = cloneString(chrom->name); region->chrom = cloneString(chrom->name); region->start = 0; region->end = hChromSize(database, chrom->name); slAddHead(®ionList, region); } } slReverse(®ionList); } else if (startsWith("chr", clRegion) && strchr(clRegion, ':') == NULL) { AllocVar(region); region->name = cloneString(clRegion); region->chrom = cloneString(clRegion); region->start = 0; region->end = hChromSize(database, clRegion); slAddHead(®ionList, region); } else { regionList = loadRegionFile(database, clRegion); } /* Gather statistics one region at a time and then * print them. */ conn = hAllocConn(database); for (region = regionList; region != NULL; region = region->next) { printf("Processing %s %s:%d-%d\n", region->name, region->chrom, region->start, region->end); statsOnSpan(database, conn, region, axtBestDir, stats, f, &winList); } fprintf(f, "\n"); reportStats(stats, f); fprintf(f, "\n"); uglyf("Got %d windows with no gaps\n", slCount(winList)); countChromWindows(database, winList, f); outputPicks(winList, database, chromLimitHash, stats, f); }
void cytoBandIdeoDraw(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis) /* Draw the entire chromosome with a little red box around our current position. */ { double scale = 0; int xBorder = 4; int x1, x2; int yBorder = 0; int chromSize = hChromSize(database, chromName); struct cytoBand *cbList = NULL, *cb = NULL; scale = (double) (width - (2 * xBorder)) / chromSize; /* Subtrack 10 for the 5 pixels buffer on either side. */ tg->heightPer -= 11; tg->lineHeight -= 11; /* Time to draw the bands. */ hvGfxSetClip(hvg, xOff, yOff, width, tg->height); genericDrawItems(tg, 0, chromSize, hvg, xOff+xBorder, yOff+5, width-(2*xBorder), font, color, tvDense); x1 = round((winStart)*scale) + xOff + xBorder -1; x2 = round((winEnd)*scale) + xOff + xBorder -1; if(x1 >= x2) x2 = x1+1; yBorder = tg->heightPer + 7 + 1; /* Draw an outline around chromosome for visualization purposes. Helps to make the chromosome look better. */ hvGfxLine(hvg, xOff+xBorder, yOff+4, xOff+width-xBorder, yOff+4, MG_BLACK); hvGfxLine(hvg, xOff+xBorder, yOff+yBorder-3, xOff+width-xBorder, yOff+yBorder-3, MG_BLACK); hvGfxLine(hvg, xOff+xBorder, yOff+4, xOff+xBorder, yOff+yBorder-3, MG_BLACK); hvGfxLine(hvg, xOff+width-xBorder, yOff+4, xOff+width-xBorder, yOff+yBorder-3, MG_BLACK); /* Find and draw the centromere which is defined as the two bands with gieStain "acen" */ cbList = tg->items; for(cb = cbList; cb != NULL; cb = cb->next) { /* If centromere do some drawing. */ if(sameString(cb->gieStain, "acen")) { int cenLeft, cenRight, cenTop, cenBottom; /* Get the coordinates of the edges of the centromere. */ cenLeft = round((cb->chromStart)*scale) + xOff + xBorder; cenRight = round((cb->next->chromEnd)*scale) + xOff + xBorder; cenTop = yOff+4; cenBottom = yOff + yBorder - 3; /* Draw centromere itself. */ hCytoBandDrawCentromere(hvg, cenLeft, cenTop, cenRight - cenLeft, cenBottom-cenTop+1, MG_WHITE, hCytoBandCentromereColor(hvg)); break; } } /* Draw a red box around all positions in windows for this chromName. * Double thick so two pixels thick each. */ struct window *window; for (window=windows; window; window=window->next) { if (!sameString(chromName, window->chromName)) continue; x1 = round((window->winStart)*scale) + xOff + xBorder -1; x2 = round((window->winEnd)*scale) + xOff + xBorder -1; hvGfxBox(hvg, x1, yOff+1, x2-x1, 2, MG_RED); hvGfxBox(hvg, x1, yOff + yBorder - 1, x2-x1, 2, MG_RED); hvGfxBox(hvg, x1, yOff+1, 2, yBorder, MG_RED); hvGfxBox(hvg, x2, yOff+1, 2, yBorder, MG_RED); } hvGfxUnclip(hvg); /* Put back the lineHeight for the track now that we are done spoofing tgDrawItems(). */ tg->heightPer += 11; tg->lineHeight += 11; }