static void printDescription(char *id, struct sqlConnection *conn, struct trackDb *tdb) /* Print out description of gene given ID. */ { char *description = descriptionString(id, conn); int i, exonCnt = 0, cdsExonCnt = 0; int cdsStart, cdsEnd; hPrintf("%s", description); freez(&description); /* print genome position and size */ char buffer[1024]; char *commaPos; if (isGencode || isGencode2) { hPrintf("<B>Gencode Transcript:</B> %s<br>\n", isGencode2 ? curGeneId : curAlignId);; char buffer[1024]; hPrintf("<B>Gencode Gene:</B> %s<br>\n", getGencodeGeneId(conn, curGeneId, buffer, sizeof buffer)); } exonCnt = curGenePred->exonCount; safef(buffer, sizeof buffer, "%s:%d-%d", curGeneChrom, curGeneStart+1, curGeneEnd); commaPos = addCommasToPos(database, buffer); hPrintf("<B>Transcript (Including UTRs)</B><br>\n"); hPrintf("<B> Position:</B> %s %s ",database, commaPos); sprintLongWithCommas(buffer, (long long)curGeneEnd - curGeneStart); hPrintf("<B>Size:</B> %s ", buffer); hPrintf("<B>Total Exon Count:</B> %d ", exonCnt); hPrintf("<B>Strand:</B> %s<br>\n",curGenePred->strand); cdsStart = curGenePred->cdsStart; cdsEnd = curGenePred->cdsEnd; /* count CDS exons */ if (cdsStart < cdsEnd) { for (i=0; i<exonCnt; i++) { if ( (cdsStart <= curGenePred->exonEnds[i]) && (cdsEnd >= curGenePred->exonStarts[i]) ) cdsExonCnt++; } hPrintf("<B>Coding Region</B><br>\n"); safef(buffer, sizeof buffer, "%s:%d-%d", curGeneChrom, cdsStart+1, cdsEnd); commaPos = addCommasToPos(database, buffer); hPrintf("<B> Position:</B> %s %s ",database, commaPos); sprintLongWithCommas(buffer, (long long)cdsEnd - cdsStart); hPrintf("<B>Size:</B> %s ", buffer); hPrintf("<B>Coding Exon Count:</B> %d \n", cdsExonCnt); } fflush(stdout); }
void printLongWithCommas(FILE *f, long long l) /* Print out a long number with commas at thousands, millions, etc. */ { char ascii[32]; sprintLongWithCommas(ascii, (intmax_t)l); fprintf(f, "%s", ascii); }
void bbiIntervalStatsReport(struct bbiInterval *bbList, char *table, char *chrom, bits32 start, bits32 end) /* Write out little statistical report in HTML */ { /* Loop through list and calculate some stats. */ bits64 iCount = 0; bits64 iTotalSize = 0; bits32 biggestSize = 0, smallestSize = BIGNUM; struct bbiInterval *bb; double sum = 0.0, sumSquares = 0.0; double minVal = bbList->val, maxVal = bbList->val; for (bb = bbList; bb != NULL; bb = bb->next) { iCount += 1; bits32 size = bb->end - bb->start; iTotalSize += size; if (biggestSize < size) biggestSize = size; if (smallestSize > size) smallestSize = size; double val = bb->val; sum += val; sumSquares += val * val; if (minVal > val) minVal = val; if (maxVal < val) maxVal = val; } char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */ sprintLongWithCommas(num1Buf, iCount); sprintLongWithCommas(num2Buf, iTotalSize); bits32 winSize = end-start; printf("<B>Statistics on:</B> %s <B>items covering</B> %s bases (%4.2f%% coverage)<BR>\n", num1Buf, num2Buf, 100.0*iTotalSize/winSize); printf("<B>Average item spans</B> %4.2f <B>bases.</B> ", (double)iTotalSize/iCount); if (biggestSize != smallestSize) { sprintLongWithCommas(num1Buf, smallestSize); sprintLongWithCommas(num2Buf, biggestSize); printf("<B>Minimum span</B> %s <B>maximum span</B> %s", num1Buf, num2Buf); } printf("<BR>\n"); printf("<B>Average value</B> %g <B>min</B> %g <B>max</B> %g <B> standard deviation </B> %g<BR>\n", sum/iCount, minVal, maxVal, calcStdFromSums(sum, sumSquares, iCount)); }
void hvGfxDrawRulerBumpText(struct hvGfx *hvg, int xOff, int yOff, int height, int width, Color color, MgFont *font, int startNum, int range, int bumpX, int bumpY) /* Draw a ruler inside the indicated part of mg with numbers that start at * startNum and span range. Bump text positions slightly. */ { int tickSpan; int tickPos; double scale; int firstTick; int remainder; int end = startNum + range; int x; char tbuf[18]; int numWid; int goodNumTicks; int niceNumTicks = width/35; sprintLongWithCommas(tbuf, startNum+range); numWid = mgFontStringWidth(font, tbuf)+4+bumpX; goodNumTicks = width/numWid; if (goodNumTicks < 1) goodNumTicks = 1; if (goodNumTicks > niceNumTicks) goodNumTicks = niceNumTicks; tickSpan = figureTickSpan(range, goodNumTicks); scale = (double)width / range; firstTick = startNum + tickSpan; remainder = firstTick % tickSpan; firstTick -= remainder; for (tickPos=firstTick; tickPos<end; tickPos += tickSpan) { sprintLongWithCommas(tbuf, tickPos); numWid = mgFontStringWidth(font, tbuf)+4; x = (int)((tickPos-startNum) * scale) + xOff; hvGfxBox(hvg, x, yOff, 1, height, color); if (x - numWid >= xOff) { hvGfxTextCentered(hvg, x-numWid + bumpX, yOff + bumpY, numWid, height, color, font, tbuf); } } }
static void bigWigClick(struct trackDb *tdb, char *fileName) /* Display details for BigWig data tracks. */ { char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = NULL; struct lm *lm = lmInit(0); struct bbiInterval *bbList = NULL; char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery"); unsigned maxWTQ = 0; if (isNotEmpty(maxWinToQuery)) maxWTQ = sqlUnsigned(maxWinToQuery); if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart)) { bbi = bigWigFileOpen(fileName); bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm); } char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */ sprintLongWithCommas(num1Buf, BASE_1(winStart)); sprintLongWithCommas(num2Buf, winEnd); printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf ); sprintLongWithCommas(num1Buf, winEnd-winStart); printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf); if (bbList != NULL) { bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd); } else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart)) { sprintLongWithCommas(num1Buf, maxWTQ); printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf); } else { printf("<P>No data overlapping current position.</P>"); } lmCleanup(&lm); bbiFileClose(&bbi); }
static void genePredPosPrint(struct column *col, struct genePos *gp, struct sqlConnection *conn) /* Print genome position with hyperlink to browser. */ { char *pos = col->cellVal(col, gp, conn); char *chrom; int start, end; hPrintf("<TD>"); if (pos == NULL) hPrintf("n/a"); else { char numBuf[32]; hgParseChromRange(database, pos, &chrom, &start, &end); sprintLongWithCommas(numBuf, (start+end+1)/2); hPrintf("<A HREF=\"%s?db=%s&position=%s&%s&%s=full\">", hgTracksName(), database, pos, cartSidUrlString(cart), genomeSetting("geneTable")); hPrintf("%s %s</A>", chrom, numBuf); freeMem(pos); } hPrintf("</TD>"); }
static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, FILE *f, char *databaseName, int databaseSeqCount, double databaseLetterCount, char *ourId, double minIdentity) /* Do ncbiblast-like output at end of processing query. */ { char asciiNum[32]; struct targetHits *targetList = NULL, *target; char *queryName; int querySize = abList->qSize; boolean isTranslated = (abList->axtList->frame != 0); /* Print out stuff that doesn't depend on query or database. */ if (ourId == NULL) ourId = "axtBlastOut"; fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId); fprintf(f, "\n"); fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n"); fprintf(f, "\n"); /* Print query and database info. */ queryName = abList->axtList->qName; fprintf(f, "Query= %s\n", queryName); fprintf(f, " (%d letters)\n", abList->qSize); fprintf(f, "\n"); fprintf(f, "Database: %s \n", databaseName); sprintLongWithCommas(asciiNum, databaseLetterCount); fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum); fprintf(f, "\n"); fprintf(f, "Searching.done\n"); targetList = bundleIntoTargets(abList); /* Print out summary of hits. */ fprintf(f, " Score E\n"); fprintf(f, "Sequences producing significant alignments: (bits) Value\n"); fprintf(f, "\n"); for (target = targetList; target != NULL; target = target->next) { struct axtRef *ref; struct axt *axt; int matches; double identity, expectation; int bit; for (ref = target->axtList; ref != NULL; ref = ref->next) { axt = ref->axt; matches = countMatches(axt->qSym, axt->tSym, axt->symCount); identity = round(100.0 * matches / axt->symCount); /* skip output if minIdentity not reached */ if (identity < minIdentity) continue; bit = blastzScoreToNcbiBits(axt->score); expectation = blastzScoreToNcbiExpectation(axt->score); fprintf(f, "%-67s %4d ", target->name, bit); ncbiPrintE(f, expectation); fprintf(f, "\n"); } } fprintf(f, "\n"); /* Print out details on each target. */ for (target = targetList; target != NULL; target = target->next) { struct axtRef *ref; struct axt *axt; int matches, gaps; char *oldName; int ii = 0; double identity; oldName = strdup(""); for (ref = target->axtList; ref != NULL; ref = ref->next) { ii++; axt = ref->axt; matches = countMatches(axt->qSym, axt->tSym, axt->symCount); identity = round(100.0 * matches / axt->symCount); /* skip output if minIdentity not reached */ if (identity < minIdentity) continue; /* print target sequence name and length only once */ if (!sameWord(oldName, target->name)) { fprintf(f, "\n\n>%s \n", target->name); fprintf(f, " Length = %d\n", target->size); oldName = strdup(target->name); } fprintf(f, "\n"); fprintf(f, " Score = %d bits (%d), Expect = ", blastzScoreToNcbiBits(axt->score), blastzScoreToNcbiScore(axt->score)); ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score)); fprintf(f, "\n"); if (isProt) { int positives = countPositives(axt->qSym, axt->tSym, axt->symCount); gaps = countGaps(axt->qSym, axt->tSym, axt->symCount); fprintf(f, " Identities = %d/%d (%d%%),", matches, axt->symCount, round(100.0 * matches / axt->symCount)); fprintf(f, " Positives = %d/%d (%d%%),", positives, axt->symCount, round(100.0 * positives / axt->symCount)); fprintf(f, " Gaps = %d/%d (%d%%)\n", gaps, axt->symCount, round(100.0 * gaps / axt->symCount)); if (axt->frame != 0) fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame); /* set the special global variable, answer_for_kg. This is needed for Known Genes track building. Fan 1/21/03 */ answer_for_kg=axt->symCount - matches; } else { fprintf(f, " Identities = %d/%d (%d%%)\n", matches, axt->symCount, round(100.0 * matches / axt->symCount)); /* blast displays dna searches as +- instead of blat's default -+ */ if (!isTranslated) if ((axt->qStrand == '-') && (axt->tStrand == '+')) { reverseIntRange(&axt->qStart, &axt->qEnd, querySize); reverseIntRange(&axt->tStart, &axt->tEnd, target->size); reverseComplement(axt->qSym, axt->symCount); reverseComplement(axt->tSym, axt->symCount); axt->qStrand = '+'; axt->tStrand = '-'; } fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand), nameForStrand(axt->tStrand)); } fprintf(f, "\n"); blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated); } } fprintf(f, " Database: %s\n", databaseName); /* Cleanup time. */ targetHitsFreeList(&targetList); }
static void wuBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, FILE *f, char *databaseName, int databaseSeqCount, double databaseLetterCount, char *ourId) /* Do wublast-like output at end of processing query. */ { char asciiNum[32]; struct targetHits *targetList = NULL, *target; char *queryName; int isRc; int querySize = abList->qSize; boolean isTranslated = (abList->axtList->frame != 0); /* Print out stuff that doesn't depend on query or database. */ if (ourId == NULL) ourId = "axtBlastOut"; fprintf(f, "%s 2.0MP-WashU [%s]\n", progType(isProt, abList, TRUE), ourId); fprintf(f, "\n"); fprintf(f, "Copyright (C) 2000-2002 Jim Kent\n"); fprintf(f, "All Rights Reserved\n"); fprintf(f, "\n"); fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n"); fprintf(f, "\n"); if (!isProt) { fprintf(f, "Notice: this program and its default parameter settings are optimized to find\n"); fprintf(f, "nearly identical sequences very rapidly. For slower but more sensitive\n"); fprintf(f, "alignments please use other methods.\n"); fprintf(f, "\n"); } /* Print query and database info. */ queryName = abList->axtList->qName; fprintf(f, "Query= %s\n", queryName); fprintf(f, " (%d letters; record %d)\n", abList->qSize, queryIx); fprintf(f, "\n"); fprintf(f, "Database: %s\n", databaseName); sprintLongWithCommas(asciiNum, databaseLetterCount); fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum); fprintf(f, "Searching....10....20....30....40....50....60....70....80....90....100%% done\n"); fprintf(f, "\n"); targetList = bundleIntoTargets(abList); /* Print out summary of hits. */ fprintf(f, " Smallest\n"); fprintf(f, " Sum\n"); fprintf(f, " High Probability\n"); fprintf(f, "Sequences producing High-scoring Segment Pairs: Score P(N) N\n"); fprintf(f, "\n"); for (target = targetList; target != NULL; target = target->next) { double expectation = blastzScoreToWuExpectation(target->score, databaseLetterCount); double p = expectationToProbability(expectation); fprintf(f, "%-61s %4d %8.1e %2d\n", target->name, blastzToWublastScore(target->score), p, slCount(target->axtList)); } /* Print out details on each target. */ for (target = targetList; target != NULL; target = target->next) { fprintf(f, "\n\n>%s\n", target->name); fprintf(f, " Length = %d\n", target->size); fprintf(f, "\n"); for (isRc=0; isRc <= 1; ++isRc) { boolean saidStrand = FALSE; char strand = (isRc ? '-' : '+'); char *strandName = nameForStrand(strand); struct axtRef *ref; struct axt *axt; for (ref = target->axtList; ref != NULL; ref = ref->next) { axt = ref->axt; if (axt->qStrand == strand) { int matches = countMatches(axt->qSym, axt->tSym, axt->symCount); int positives = countPositives(axt->qSym, axt->tSym, axt->symCount); if (!saidStrand) { saidStrand = TRUE; if (!isProt) fprintf(f, " %s Strand HSPs:\n\n", strandName); } fprintf(f, " Score = %d (%2.1f bits), Expect = %5.1e, P = %5.1e\n", blastzToWublastScore(axt->score), blastzScoreToWuBits(axt->score, isProt), blastzScoreToWuExpectation(axt->score, databaseLetterCount), blastzScoreToWuExpectation(axt->score, databaseLetterCount)); fprintf(f, " Identities = %d/%d (%d%%), Positives = %d/%d (%d%%)", matches, axt->symCount, round(100.0 * matches / axt->symCount), positives, axt->symCount, round(100.0 * positives / axt->symCount)); if (isProt) { if (axt->frame != 0) fprintf(f, ", Frame = %c%d", axt->tStrand, axt->frame); fprintf(f, "\n"); } else fprintf(f, ", Strand = %s / Plus\n", strandName); fprintf(f, "\n"); blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated); } } } } /* Cleanup time. */ targetHitsFreeList(&targetList); }
int main(int argc, char *argv[]) /* Process command line. */ { int i; char *cp; unsigned long long reversed; size_t maxAlloc; char asciiAlloc[32]; optionInit(&argc, argv, options); if (argc < 2) usage(); maxAlloc = 2100000000 * (((sizeof(size_t)/4)*(sizeof(size_t)/4)*(sizeof(size_t)/4))); sprintLongWithCommas(asciiAlloc, (long long) maxAlloc); verbose(4, "#\tmaxAlloc: %s\n", asciiAlloc); setMaxAlloc(maxAlloc); /* produces: size_t is 4 == 2100000000 ~= 2^31 = 2Gb * size_t is 8 = 16800000000 ~= 2^34 = 16 Gb */ dnaUtilOpen(); motif = optionVal("motif", NULL); chr = optionVal("chr", NULL); strand = optionVal("strand", NULL); bedOutput = optionExists("bedOutput"); wigOutput = optionExists("wigOutput"); if (wigOutput) bedOutput = FALSE; else bedOutput = TRUE; if (chr) verbose(2, "#\tprocessing chr: %s\n", chr); if (strand) verbose(2, "#\tprocessing strand: '%s'\n", strand); if (motif) verbose(2, "#\tsearching for motif: %s\n", motif); else { warn("ERROR: -motif string empty, please specify a motif\n"); usage(); } verbose(2, "#\ttype output: %s\n", wigOutput ? "wiggle data" : "bed format"); verbose(2, "#\tspecified sequence: %s\n", argv[1]); verbose(2, "#\tsizeof(motifVal): %d\n", (int)sizeof(motifVal)); if (strand) { if (! (sameString(strand,"+") | sameString(strand,"-"))) { warn("ERROR: -strand specified ('%s') is not + or - ?\n", strand); usage(); } /* They are both on by default, turn off the one not specified */ if (sameString(strand,"-")) doPlusStrand = FALSE; if (sameString(strand,"+")) doMinusStrand = FALSE; } motifLen = strlen(motif); /* at two bits per character, size limit of motif is * number of bits in motifVal / 2 */ if (motifLen > (4*sizeof(motifVal))/2 ) { warn("ERROR: motif string too long, limit %d\n", (4*(int)sizeof(motifVal))/2 ); usage(); } cp = motif; motifVal = 0; complementVal = 0; for (i = 0; i < motifLen; ++i) { switch (*cp) { case 'a': case 'A': motifVal = (motifVal << 2) | A_BASE_VAL; complementVal = (complementVal << 2) | T_BASE_VAL; break; case 'c': case 'C': motifVal = (motifVal << 2) | C_BASE_VAL; complementVal = (complementVal << 2) | G_BASE_VAL; break; case 'g': case 'G': motifVal = (motifVal << 2) | G_BASE_VAL; complementVal = (complementVal << 2) | C_BASE_VAL; break; case 't': case 'T': motifVal = (motifVal << 2) | T_BASE_VAL; complementVal = (complementVal << 2) | A_BASE_VAL; break; default: warn( "ERROR: character in motif: '%c' is not one of ACGT\n", *cp); usage(); } ++cp; } reversed = 0; for (i = 0; i < motifLen; ++i) { int base; base = complementVal & 3; reversed = (reversed << 2) | base; complementVal >>= 2; } complementVal = reversed; verbose(2, "#\tmotif numerical value: %llu (%#llx)\n", motifVal, motifVal); verbose(2, "#\tcomplement numerical value: %llu (%#llx)\n", complementVal, complementVal); if (motifLen < 5) { warn("ERROR: motif string must be more than 4 characters\n"); usage(); } findMotif(argv[1]); return 0; }