Example #1
0
static void printDescription(char *id, struct sqlConnection *conn, struct trackDb *tdb)
/* Print out description of gene given ID. */
{
char *description = descriptionString(id, conn);
int  i, exonCnt = 0, cdsExonCnt = 0;
int  cdsStart, cdsEnd;

hPrintf("%s", description);
freez(&description);

/* print genome position and size */
char buffer[1024];
char *commaPos;
   
if (isGencode || isGencode2)
    {
    hPrintf("<B>Gencode Transcript:</B> %s<br>\n", isGencode2 ? curGeneId : curAlignId);;
    char buffer[1024];
    hPrintf("<B>Gencode Gene:</B> %s<br>\n", getGencodeGeneId(conn, curGeneId, buffer, sizeof buffer));
    }
exonCnt = curGenePred->exonCount;
safef(buffer, sizeof buffer, "%s:%d-%d", curGeneChrom, curGeneStart+1, curGeneEnd);
commaPos = addCommasToPos(database, buffer);

hPrintf("<B>Transcript (Including UTRs)</B><br>\n");
hPrintf("<B>&nbsp;&nbsp;&nbsp;Position:</B>&nbsp;%s %s&nbsp;",database, commaPos);
sprintLongWithCommas(buffer, (long long)curGeneEnd - curGeneStart);
hPrintf("<B>Size:</B>&nbsp;%s&nbsp;", buffer);
hPrintf("<B>Total Exon Count:</B>&nbsp;%d&nbsp;", exonCnt);
hPrintf("<B>Strand:</B>&nbsp;%s<br>\n",curGenePred->strand);

cdsStart = curGenePred->cdsStart;
cdsEnd = curGenePred->cdsEnd;

/* count CDS exons */
if (cdsStart < cdsEnd)
    {
    for (i=0; i<exonCnt; i++)
	{
	if ( (cdsStart <= curGenePred->exonEnds[i]) &&
	     (cdsEnd >= curGenePred->exonStarts[i]) )
	     cdsExonCnt++;
	}
    hPrintf("<B>Coding Region</B><br>\n");
    safef(buffer, sizeof buffer, "%s:%d-%d", curGeneChrom, cdsStart+1, cdsEnd);
    commaPos = addCommasToPos(database, buffer);
    hPrintf("<B>&nbsp;&nbsp;&nbsp;Position:</B>&nbsp;%s %s&nbsp;",database, commaPos);
    sprintLongWithCommas(buffer, (long long)cdsEnd - cdsStart);
    hPrintf("<B>Size:</B>&nbsp;%s&nbsp;", buffer);
    hPrintf("<B>Coding Exon Count:</B>&nbsp;%d&nbsp;\n", cdsExonCnt);
    }
fflush(stdout);
}
Example #2
0
void printLongWithCommas(FILE *f, long long l)
/* Print out a long number with commas at thousands, millions, etc. */
{
char ascii[32];
sprintLongWithCommas(ascii, (intmax_t)l);
fprintf(f, "%s", ascii);
}
Example #3
0
void bbiIntervalStatsReport(struct bbiInterval *bbList, char *table, 
	char *chrom, bits32 start, bits32 end)
/* Write out little statistical report in HTML */
{
/* Loop through list and calculate some stats. */
bits64 iCount = 0;
bits64 iTotalSize = 0;
bits32 biggestSize = 0, smallestSize = BIGNUM;
struct bbiInterval *bb;
double sum = 0.0, sumSquares = 0.0;
double minVal = bbList->val, maxVal = bbList->val;
for (bb = bbList; bb != NULL; bb = bb->next)
    {
    iCount += 1;
    bits32 size = bb->end - bb->start;
    iTotalSize += size;
    if (biggestSize < size)
        biggestSize = size;
    if (smallestSize > size)
        smallestSize = size;
    double val = bb->val;
    sum += val;
    sumSquares += val * val;
    if (minVal > val)
        minVal = val;
    if (maxVal < val)
        maxVal = val;
    }

char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */
sprintLongWithCommas(num1Buf, iCount);
sprintLongWithCommas(num2Buf, iTotalSize);
bits32 winSize = end-start;
printf("<B>Statistics on:</B> %s <B>items covering</B> %s bases (%4.2f%% coverage)<BR>\n",
	num1Buf, num2Buf, 100.0*iTotalSize/winSize);
printf("<B>Average item spans</B> %4.2f <B>bases.</B> ", (double)iTotalSize/iCount);
if (biggestSize != smallestSize)
    {
    sprintLongWithCommas(num1Buf, smallestSize);
    sprintLongWithCommas(num2Buf, biggestSize);
    printf("<B>Minimum span</B> %s <B>maximum span</B> %s", num1Buf, num2Buf);
    }
printf("<BR>\n");

printf("<B>Average value</B> %g <B>min</B> %g <B>max</B> %g <B> standard deviation </B> %g<BR>\n",
	sum/iCount, minVal, maxVal, calcStdFromSums(sum, sumSquares, iCount));
}
Example #4
0
void hvGfxDrawRulerBumpText(struct hvGfx *hvg, int xOff, int yOff, 
	int height, int width,
        Color color, MgFont *font,
        int startNum, int range, int bumpX, int bumpY)
/* Draw a ruler inside the indicated part of mg with numbers that start at
 * startNum and span range.  Bump text positions slightly. */
{
int tickSpan;
int tickPos;
double scale;
int firstTick;
int remainder;
int end = startNum + range;
int x;
char tbuf[18];
int numWid;
int goodNumTicks;
int niceNumTicks = width/35;

sprintLongWithCommas(tbuf, startNum+range);
numWid = mgFontStringWidth(font, tbuf)+4+bumpX;
goodNumTicks = width/numWid;
if (goodNumTicks < 1) goodNumTicks = 1;
if (goodNumTicks > niceNumTicks) goodNumTicks = niceNumTicks;

tickSpan = figureTickSpan(range, goodNumTicks);

scale = (double)width / range;

firstTick = startNum + tickSpan;
remainder = firstTick % tickSpan;
firstTick -= remainder;
for (tickPos=firstTick; tickPos<end; tickPos += tickSpan)
    {
    sprintLongWithCommas(tbuf, tickPos);
    numWid = mgFontStringWidth(font, tbuf)+4;
    x = (int)((tickPos-startNum) * scale) + xOff;
    hvGfxBox(hvg, x, yOff, 1, height, color);
    if (x - numWid >= xOff)
        {
        hvGfxTextCentered(hvg, x-numWid + bumpX, yOff + bumpY, numWid, 
                          height, color, font, tbuf);
        }
    }
}
Example #5
0
static void bigWigClick(struct trackDb *tdb, char *fileName)
/* Display details for BigWig data tracks. */
{
char *chrom = cartString(cart, "c");

/* Open BigWig file and get interval list. */
struct bbiFile *bbi = NULL;
struct lm *lm = lmInit(0);
struct bbiInterval *bbList = NULL;
char *maxWinToQuery = trackDbSettingClosestToHome(tdb, "maxWindowToQuery");

unsigned maxWTQ = 0;
if (isNotEmpty(maxWinToQuery))
    maxWTQ = sqlUnsigned(maxWinToQuery);

if ((maxWinToQuery == NULL) || (maxWTQ > winEnd-winStart))
    {
    bbi = bigWigFileOpen(fileName);
    bbList = bigWigIntervalQuery(bbi, chrom, winStart, winEnd, lm);
    }

char num1Buf[64], num2Buf[64]; /* big enough for 2^64 (and then some) */
sprintLongWithCommas(num1Buf, BASE_1(winStart));
sprintLongWithCommas(num2Buf, winEnd);
printf("<B>Position: </B> %s:%s-%s<BR>\n", chrom, num1Buf, num2Buf );
sprintLongWithCommas(num1Buf, winEnd-winStart);
printf("<B>Total Bases in view: </B> %s <BR>\n", num1Buf);

if (bbList != NULL)
    {
    bbiIntervalStatsReport(bbList, tdb->table, chrom, winStart, winEnd);
    }
else if ((bbi == NULL) && (maxWTQ <= winEnd-winStart))
    {
    sprintLongWithCommas(num1Buf, maxWTQ);
    printf("<P>Zoom in to a view less than %s bases to see data summary.</P>",num1Buf);
    }
else
    {
    printf("<P>No data overlapping current position.</P>");
    }

lmCleanup(&lm);
bbiFileClose(&bbi);
}
Example #6
0
static void genePredPosPrint(struct column *col, struct genePos *gp, 
	struct sqlConnection *conn)
/* Print genome position with hyperlink to browser. */
{
char *pos = col->cellVal(col, gp, conn);
char *chrom;
int start, end;

hPrintf("<TD>");
if (pos == NULL)
    hPrintf("n/a");
else
    {
    char numBuf[32];
    hgParseChromRange(database, pos, &chrom, &start, &end);
    sprintLongWithCommas(numBuf, (start+end+1)/2);
    hPrintf("<A HREF=\"%s?db=%s&position=%s&%s&%s=full\">",
	    hgTracksName(), database, pos, cartSidUrlString(cart), genomeSetting("geneTable"));
    hPrintf("%s&nbsp;%s</A>", chrom, numBuf);
    freeMem(pos);
    }
hPrintf("</TD>");
}
Example #7
0
static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
	FILE *f, char *databaseName, int databaseSeqCount, 
	double databaseLetterCount, char *ourId, double minIdentity)
/* Do ncbiblast-like output at end of processing query. */
{
char asciiNum[32];
struct targetHits *targetList = NULL, *target;
char *queryName;
int querySize = abList->qSize;
boolean isTranslated = (abList->axtList->frame != 0);

/* Print out stuff that doesn't depend on query or database. */
if (ourId == NULL)
    ourId = "axtBlastOut";
fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId);
fprintf(f, "\n");
fprintf(f, "Reference:  Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
fprintf(f, "\n");

/* Print query and database info. */
queryName = abList->axtList->qName;
fprintf(f, "Query= %s\n", queryName);
fprintf(f, "         (%d letters)\n", abList->qSize);
fprintf(f, "\n");
fprintf(f, "Database: %s \n",  databaseName);
sprintLongWithCommas(asciiNum, databaseLetterCount);
fprintf(f, "           %d sequences; %s total letters\n",  databaseSeqCount, asciiNum);
fprintf(f, "\n");
fprintf(f, "Searching.done\n");

targetList = bundleIntoTargets(abList);

/* Print out summary of hits. */
fprintf(f, "                                                                 Score    E\n");
fprintf(f, "Sequences producing significant alignments:                      (bits) Value\n");
fprintf(f, "\n");
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    struct axt *axt;
    int matches;
    double identity, expectation;
    int bit;
    
    for (ref = target->axtList; ref != NULL; ref = ref->next)
	{
	axt = ref->axt;
	
	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	identity = round(100.0 * matches / axt->symCount);
	/* skip output if minIdentity not reached */
	if (identity < minIdentity) continue;
    
    	bit = blastzScoreToNcbiBits(axt->score);
        expectation = blastzScoreToNcbiExpectation(axt->score);
    	fprintf(f, "%-67s  %4d   ", target->name, bit);
    	ncbiPrintE(f, expectation);
    	fprintf(f, "\n");
    	}
    }
fprintf(f, "\n");

/* Print out details on each target. */
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    struct axt *axt;
    int matches, gaps;
    char *oldName;
    
    int ii = 0;
    double identity;
    oldName = strdup("");

    for (ref = target->axtList; ref != NULL; ref = ref->next)
	{
	ii++;
	axt = ref->axt;
	
	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	identity = round(100.0 * matches / axt->symCount);
	
	/* skip output if minIdentity not reached */
	if (identity < minIdentity) continue;
        
	/* print target sequence name and length only once */ 
	if (!sameWord(oldName, target->name))
	    {
	    fprintf(f, "\n\n>%s \n", target->name);
	    fprintf(f, "          Length = %d\n", target->size);
	    oldName = strdup(target->name);
	    }

	fprintf(f, "\n");
	fprintf(f, " Score = %d bits (%d), Expect = ",
	     blastzScoreToNcbiBits(axt->score),
	     blastzScoreToNcbiScore(axt->score));
	ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score));
	fprintf(f, "\n");
	
	if (isProt)
	    {
	    int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
	    gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
	    fprintf(f, " Identities = %d/%d (%d%%),",
		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
	    fprintf(f, " Positives = %d/%d (%d%%),",
		 positives, axt->symCount, round(100.0 * positives / axt->symCount));
	    fprintf(f, " Gaps = %d/%d (%d%%)\n",
		 gaps, axt->symCount, round(100.0 * gaps / axt->symCount));
	    if (axt->frame != 0) 
		fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame);
	    /* set the special global variable, answer_for_kg.  
   	       This is needed for Known Genes track building.  Fan 1/21/03 */
            answer_for_kg=axt->symCount - matches;
	    }
	else
	    {
	    fprintf(f, " Identities = %d/%d (%d%%)\n",
		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
	    /* blast displays dna searches as +- instead of blat's default -+ */
	    if (!isTranslated)
		if ((axt->qStrand == '-') && (axt->tStrand == '+'))
		    {
		    reverseIntRange(&axt->qStart, &axt->qEnd, querySize);
		    reverseIntRange(&axt->tStart, &axt->tEnd, target->size);
		    reverseComplement(axt->qSym, axt->symCount);
		    reverseComplement(axt->tSym, axt->symCount);
		    axt->qStrand = '+';
		    axt->tStrand = '-';
		    }
	    fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand),
		nameForStrand(axt->tStrand));
	    }
	fprintf(f, "\n");
	blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
	}
    }

fprintf(f, "  Database: %s\n", databaseName);

/* Cleanup time. */
targetHitsFreeList(&targetList);
}
Example #8
0
static void wuBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
	FILE *f, 
	char *databaseName, int databaseSeqCount, double databaseLetterCount, 
	char *ourId)
/* Do wublast-like output at end of processing query. */
{
char asciiNum[32];
struct targetHits *targetList = NULL, *target;
char *queryName;
int isRc;
int querySize = abList->qSize;
boolean isTranslated = (abList->axtList->frame != 0);

/* Print out stuff that doesn't depend on query or database. */
if (ourId == NULL)
    ourId = "axtBlastOut";
fprintf(f, "%s 2.0MP-WashU [%s]\n", progType(isProt, abList, TRUE), ourId);
fprintf(f, "\n");
fprintf(f, "Copyright (C) 2000-2002 Jim Kent\n");
fprintf(f, "All Rights Reserved\n");
fprintf(f, "\n");
fprintf(f, "Reference:  Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
fprintf(f, "\n");
if (!isProt)
    {
    fprintf(f, "Notice:  this program and its default parameter settings are optimized to find\n");
    fprintf(f, "nearly identical sequences very rapidly.  For slower but more sensitive\n");
    fprintf(f, "alignments please use other methods.\n");
    fprintf(f, "\n");
    }

/* Print query and database info. */
queryName = abList->axtList->qName;
fprintf(f, "Query=  %s\n", queryName);
fprintf(f, "        (%d letters; record %d)\n", abList->qSize, queryIx);
fprintf(f, "\n");
fprintf(f, "Database:  %s\n",  databaseName);
sprintLongWithCommas(asciiNum, databaseLetterCount);
fprintf(f, "           %d sequences; %s total letters\n",  databaseSeqCount, asciiNum);
fprintf(f, "Searching....10....20....30....40....50....60....70....80....90....100%% done\n");
fprintf(f, "\n");

targetList = bundleIntoTargets(abList);

/* Print out summary of hits. */
fprintf(f, "                                                                     Smallest\n");
fprintf(f, "                                                                       Sum\n");
fprintf(f, "                                                              High  Probability\n");
fprintf(f, "Sequences producing High-scoring Segment Pairs:              Score  P(N)      N\n");
fprintf(f, "\n");
for (target = targetList; target != NULL; target = target->next)
    {
    double expectation = blastzScoreToWuExpectation(target->score, databaseLetterCount);
    double p = expectationToProbability(expectation);
    fprintf(f, "%-61s %4d  %8.1e %2d\n", target->name, 
    	blastzToWublastScore(target->score), p, slCount(target->axtList));
    }

/* Print out details on each target. */
for (target = targetList; target != NULL; target = target->next)
    {
    fprintf(f, "\n\n>%s\n", target->name);
    fprintf(f, "        Length = %d\n", target->size);
    fprintf(f, "\n");
    for (isRc=0; isRc <= 1; ++isRc)
	{
	boolean saidStrand = FALSE;
	char strand = (isRc ? '-' : '+');
	char *strandName = nameForStrand(strand);
	struct axtRef *ref;
	struct axt *axt;
	for (ref = target->axtList; ref != NULL; ref = ref->next)
	    {
	    axt = ref->axt;
	    if (axt->qStrand == strand)
		{
		int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
		int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
		if (!saidStrand)
		    {
		    saidStrand = TRUE;
		    if (!isProt)
			fprintf(f, "  %s Strand HSPs:\n\n", strandName);
		    }
		fprintf(f, " Score = %d (%2.1f bits), Expect = %5.1e, P = %5.1e\n",
		     blastzToWublastScore(axt->score), 
		     blastzScoreToWuBits(axt->score, isProt),
		     blastzScoreToWuExpectation(axt->score, databaseLetterCount),
		     blastzScoreToWuExpectation(axt->score, databaseLetterCount));
		fprintf(f, " Identities = %d/%d (%d%%), Positives = %d/%d (%d%%)",
		     matches, axt->symCount, round(100.0 * matches / axt->symCount),
		     positives, axt->symCount, round(100.0 * positives / axt->symCount));
		if (isProt)
		    {
		    if (axt->frame != 0)
		        fprintf(f, ", Frame = %c%d", axt->tStrand, axt->frame);
		    fprintf(f, "\n");
		    }
		else
		    fprintf(f, ", Strand = %s / Plus\n", strandName);
		fprintf(f, "\n");
		blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
		}
	    }
	}
    }

/* Cleanup time. */
targetHitsFreeList(&targetList);
}
int main(int argc, char *argv[])
/* Process command line. */
{
int i;
char *cp;
unsigned long long reversed;
size_t maxAlloc;
char asciiAlloc[32];

optionInit(&argc, argv, options);

if (argc < 2)
    usage();

maxAlloc = 2100000000 *
	 (((sizeof(size_t)/4)*(sizeof(size_t)/4)*(sizeof(size_t)/4)));
sprintLongWithCommas(asciiAlloc, (long long) maxAlloc);
verbose(4, "#\tmaxAlloc: %s\n", asciiAlloc);
setMaxAlloc(maxAlloc);
/* produces: size_t is 4 == 2100000000 ~= 2^31 = 2Gb
 *      size_t is 8 = 16800000000 ~= 2^34 = 16 Gb
 */

dnaUtilOpen();

motif = optionVal("motif", NULL);
chr = optionVal("chr", NULL);
strand = optionVal("strand", NULL);
bedOutput = optionExists("bedOutput");
wigOutput = optionExists("wigOutput");

if (wigOutput)
    bedOutput = FALSE;
else
    bedOutput = TRUE;

if (chr)
    verbose(2, "#\tprocessing chr: %s\n", chr);
if (strand)
    verbose(2, "#\tprocessing strand: '%s'\n", strand);
if (motif)
    verbose(2, "#\tsearching for motif: %s\n", motif);
else {
    warn("ERROR: -motif string empty, please specify a motif\n");
    usage();
}
verbose(2, "#\ttype output: %s\n", wigOutput ? "wiggle data" : "bed format");
verbose(2, "#\tspecified sequence: %s\n", argv[1]);
verbose(2, "#\tsizeof(motifVal): %d\n", (int)sizeof(motifVal));
if (strand)
    {
    if (! (sameString(strand,"+") | sameString(strand,"-")))
	{
	warn("ERROR: -strand specified ('%s') is not + or - ?\n", strand);
	usage();
	}
    /*	They are both on by default, turn off the one not specified */
    if (sameString(strand,"-"))
	doPlusStrand = FALSE;
    if (sameString(strand,"+"))
	doMinusStrand = FALSE;
    }
motifLen = strlen(motif);
/*	at two bits per character, size limit of motif is
 *	number of bits in motifVal / 2
 */
if (motifLen > (4*sizeof(motifVal))/2 )
    {
    warn("ERROR: motif string too long, limit %d\n", (4*(int)sizeof(motifVal))/2 );
    usage();
    }
cp = motif;
motifVal = 0;
complementVal = 0;
for (i = 0; i < motifLen; ++i)
    {
	switch (*cp)
	{
	case 'a':
	case 'A':
	    motifVal = (motifVal << 2) | A_BASE_VAL;
	    complementVal = (complementVal << 2) | T_BASE_VAL;
	    break;
	case 'c':
	case 'C':
	    motifVal = (motifVal << 2) | C_BASE_VAL;
	    complementVal = (complementVal << 2) | G_BASE_VAL;
	    break;
	case 'g':
	case 'G':
	    motifVal = (motifVal << 2) | G_BASE_VAL;
	    complementVal = (complementVal << 2) | C_BASE_VAL;
	    break;
	case 't':
	case 'T':
	    motifVal = (motifVal << 2) | T_BASE_VAL;
	    complementVal = (complementVal << 2) | A_BASE_VAL;
	    break;
	default:
	    warn(
		"ERROR: character in motif: '%c' is not one of ACGT\n", *cp);
	    usage();
	}
	++cp;
    }
reversed = 0;
for (i = 0; i < motifLen; ++i)
    {
    int base;
    base = complementVal & 3;
    reversed = (reversed << 2) | base;
    complementVal >>= 2;
    }
complementVal = reversed;
verbose(2, "#\tmotif numerical value: %llu (%#llx)\n", motifVal, motifVal);
verbose(2, "#\tcomplement numerical value: %llu (%#llx)\n", complementVal, complementVal);
if (motifLen < 5)
    {
    warn("ERROR: motif string must be more than 4 characters\n");
    usage();
    }

findMotif(argv[1]);
return 0;
}