Пример #1
0
void doTriangle(struct trackDb *tdb, char *item, char *motifTable)
/* Display detailed info on a regulatory triangle item. */
{
int start = cartInt(cart, "o");
struct dnaSeq *seq = NULL;
struct dnaMotif *motif = loadDnaMotif(item, motifTable);
char *table = tdb->table;
int rowOffset = hOffsetPastBin(database, seqName, table);
char query[256];
struct sqlResult *sr;
char **row;
struct bed *hit = NULL;
struct sqlConnection *conn = hAllocConn(database);

cartWebStart(cart, database, "Regulatory Motif Info");
genericBedClick(conn, tdb, item, start, 6);

sqlSafef(query, sizeof query,
	"select * from %s where  name = '%s' and chrom = '%s' and chromStart = %d",
	table, item, seqName, start);
sr = sqlGetResult(conn, query);
row = sqlNextRow(sr);
if (row != NULL)
    hit = bedLoadN(row + rowOffset, 6);
sqlFreeResult(&sr);

if (hit != NULL)
    {
    seq = hDnaFromSeq(database, hit->chrom, hit->chromStart, hit->chromEnd, dnaLower);
    if (hit->strand[0] == '-')
	reverseComplement(seq->dna, seq->size);
    }
motifHitSection(seq, motif);
printTrackHtml(tdb);
}
Пример #2
0
void doTransRegCode(struct trackDb *tdb, char *item, char *motifTable)
/* Display detailed info on a transcriptional regulatory code item. */
{
struct dnaMotif *motif = loadDnaMotif(item, motifTable);
int start = cartInt(cart, "o");
struct dnaSeq *seq = NULL;
char *table = tdb->table;
int rowOffset = hOffsetPastBin(database, seqName, table);
char query[256];
struct sqlResult *sr;
char **row;
struct sqlConnection *conn = hAllocConn(database);
struct transRegCode *trc = NULL;

cartWebStart(cart, database, "Regulatory Code Info");
sqlSafef(query, sizeof query,
	"select * from %s where  name = '%s' and chrom = '%s' and chromStart = %d",
	table, item, seqName, start);
sr = sqlGetResult(conn, query);
row = sqlNextRow(sr);
if (row != NULL)
    trc = transRegCodeLoad(row+rowOffset);
sqlFreeResult(&sr);

if (trc != NULL)
    {
    char strand[2];
    seq = hDnaFromSeq(database, trc->chrom, trc->chromStart, trc->chromEnd, dnaLower);
    if (seq->size != motif->columnCount)
	{
        printf("WARNING: seq->size = %d, motif->colCount=%d<BR>\n",
		seq->size, motif->columnCount);
	strand[0] = '?';
	seq = NULL;
	}
    else
	{
	strand[0] = dnaMotifBestStrand(motif, seq->dna);
	if (strand[0] == '-')
	    reverseComplement(seq->dna, seq->size);
	}
    strand[1] = 0;
    printf("<B>Name:</B> ");
    sacCerHgGeneLinkName(conn, trc->name);
    printf("<BR>\n");
    printf("<B>ChIP-chip Evidence:</B> %s<BR>\n", trc->chipEvidence);
    printf("<B>Species conserved in:</B> %d of 2<BR>\n", trc->consSpecies);
    if (seq != NULL)
	printf("<B>Bit Score of Motif Hit:</B> %4.2f<BR>\n",
	    dnaMotifBitScore(motif, seq->dna));
    printf("<B>Item score:</B> %d<BR>\n", trc->score);
    printPosOnChrom(trc->chrom, trc->chromStart, trc->chromEnd, strand, TRUE, trc->name);
    }
motifHitSection(seq, motif);
printTrackHtml(tdb);
}
Пример #3
0
struct dnaSeq *dnaFromChrom(char *db, char *chrom, int chromStart, int chromEnd, enum dnaCase seqCase)
/** Return the dna for the chromosome region specified. */
{
struct dnaSeq *seq = NULL;
if(chromNib != NULL)
    {
    seq = (struct dnaSeq *)nibLdPart(chromNib, chromNibFile, chromNibSize, 
				     chromStart, chromEnd - chromStart);
    }
else
    seq = hDnaFromSeq(db, chrom, chromStart, chromEnd, seqCase);
return seq;
}
Пример #4
0
static double motifScoreHere(char *chrom, int start, int end,
	char *motifName, char *motifTable)
/* Return score of motif at given position. */
{
double score;
struct dnaSeq *seq = hDnaFromSeq(database, chrom, start, end, dnaLower);
struct dnaMotif *motif = loadDnaMotif(motifName, motifTable);
char strand = dnaMotifBestStrand(motif, seq->dna);
if (strand == '-')
    reverseComplement(seq->dna, seq->size);
score = dnaMotifBitScore(motif, seq->dna);
dnaMotifFree(&motif);
dnaSeqFree(&seq);
return score;
}
Пример #5
0
void chromFeatureSeq(struct sqlConnection *conn, 
	char *database, char *chrom, char *trackSpec,
	FILE *bedFile, FILE *faFile,
	int *retItemCount, int *retBaseCount)
/* Write out sequence file for features from one chromosome.
 * This separate routine handles the non-merged case.  It's
 * reason for being is so that the feature names get preserved. */
{
boolean hasBin;
char t[512], *s = NULL;
char table[HDB_MAX_TABLE_STRING];
struct featureBits *fbList = NULL, *fb;

if (trackSpec[0] == '!')
   errAbort("Sorry, '!' not available with fa output unless you use faMerge");
isolateTrackPartOfSpec(trackSpec, t);
s = strchr(t, '.');
if (s != NULL)
    errAbort("Sorry, only database (not file) tracks allowed with "
             "fa output unless you use faMerge");
// ignore isSplit return from hFindSplitTable()
(void) hFindSplitTable(database, chrom, t, table, &hasBin);
fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom),
			 where, TRUE, TRUE);
for (fb = fbList; fb != NULL; fb = fb->next)
    {
    int s = fb->start, e = fb->end;
    if (bedFile != NULL)
	{
	fprintf(bedFile, "%s\t%d\t%d\t%s", 
	    fb->chrom, fb->start, fb->end, fb->name);
	if (fb->strand != '?')
	    fprintf(bedFile, "\t0\t%c", fb->strand);
	fprintf(bedFile, "\n");
	}
    if (faFile != NULL)
        {
	struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower);
	if (fb->strand == '-')
	    reverseComplement(seq->dna, seq->size);
	faWriteNext(faFile, fb->name, seq->dna, seq->size);
	freeDnaSeq(&seq);
	}
    }
featureBitsFreeList(&fbList);
}
void runSamples(char *goodFile, char *badFile, char *newDb, char *oldDb, int numToRun)
/* run a bunch of tests */
{
int i,j,k;
FILE *good = mustOpen(goodFile, "w");
FILE *bad = mustOpen(badFile, "w");
char *tmp = NULL;
int numGood=0, numBad=0, tooManyNs=0;
boolean success = FALSE;
struct dnaSeq *seq = NULL;
printf("Running Tests\t");
for(i=0;i<numToRun;i++)
    {
    struct coordConvRep *ccr = NULL;
    struct coordConv *cc = NULL;
    if(!(i%10)) putTic();
    cc = getRandomCoord(oldDb);
    seq = hDnaFromSeq(cc->chrom, cc->chromStart, cc->chromEnd, dnaLower);
    if(!(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn")))
	{
	chrom = cc->chrom;
	chromStart = cc->chromStart;
	chromEnd = cc->chromEnd;
	success = convertCoordinates(good, bad, printReport, printReport);
	if(success)
	    numGood++;
	else
	    numBad++;
	}
    else 
	{
	tooManyNs++;
	}
    freeDnaSeq(&seq);
    coordConvFree(&cc);
    }
carefulClose(&good);
carefulClose(&bad);
printf("\tDone.\n");
printf("Out of %d attempts got %d 'succesfully converted' and %d 'had problems', %d had too many N's\n", 
       (numGood + numBad), numGood, numBad, tooManyNs);
printf("After checking got %d of %d correctly called and %d incorrectly called.\n", 
       hgTestCorrect, hgTestCorrect+hgTestWrong, hgTestWrong);
}
Пример #7
0
void motifFinder(char *database, char *name, int fileCount, char *files[])
/* motifFinder - find largest scoring motif in bed items. */
{
struct sqlConnection *conn = sqlConnect(database);
int fileNum;
char where[256];
struct chromInfo *ci  = createChromInfoList(NULL, database);
sqlSafefFrag(where, sizeof(where), "name = '%s'", name);
struct dnaMotif *motif = dnaMotifLoadWhere(conn, motifTable, where);
if(markovTable != NULL)
    dnaMotifMakeLog2(motif);
if(motif == NULL)
    errAbort("couldn't find motif '%s'", name);
for (fileNum = 0; fileNum < fileCount; fileNum++)
    {
    char *words[64], *line;
    char **row;
    struct lineFile *lf = lineFileOpen(files[fileNum], TRUE);
    while (lineFileNextReal(lf, &line))
        {
	int dnaLength, i, j, rowOffset, length, wordCount = chopTabs(line, words);
        unsigned chromSize;
        boolean markovFound = FALSE;
        double mark0[5];
        double mark2[5][5][5];
        struct dnaSeq *seq = NULL;
        char *dupe = NULL;
        if (0 == wordCount)
            continue;
        lineFileExpectAtLeast(lf, 3, wordCount);
        dupe = cloneString(line);
        char *chrom = words[0];
        int chromStart = lineFileNeedNum(lf, words, 1);
        if(markovTable != NULL)
            chromStart = max(2, chromStart);
        unsigned chromEnd = lineFileNeedNum(lf, words, 2);
        if (chromEnd < 1)
            errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n",
		     lf->lineIx, dupe);
        if (chromStart > chromEnd)
            errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n",
                     lf->lineIx, dupe, chromStart, chromEnd);
        length = chromEnd - chromStart;
        chromSize = getChromSize(ci, chrom);
        if(markovTable == NULL)
            {
            dnaLength = length;
            seq = hDnaFromSeq(database, chrom, chromStart, chromEnd, dnaUpper);
            if(uniformBackground)
                {
                int i;
                mark0[0] = 1;
                for(i = 1; i <= 4; i++)
                    mark0[i] = 0.25;
                }
            else
                {
                dnaMark0(seq, mark0, NULL);
                }
            }
        else
            {
            dnaLength = length + 4;
            if(chromStart - 2 + dnaLength > chromSize)
                // can't do analysis for potential peak hanging off the end of the chrom
                continue;
            seq = hDnaFromSeq(database, chrom, chromStart - 2, chromEnd + 2, dnaUpper);
            struct sqlResult *sr = hRangeQuery(conn, markovTable, chrom, chromStart,
                                               chromStart + 1, NULL, &rowOffset);
            if((row = sqlNextRow(sr)) != NULL)
                {
                dnaMark2Deserialize(row[rowOffset + 3], mark2);
                dnaMarkMakeLog2(mark2);
                markovFound = TRUE;
                }
            else
                errAbort("markov table '%s' is missing; non-markov analysis is current not supported", markovTable);
            sqlFreeResult(&sr);
            }
        struct bed6FloatScore *hits = NULL;
        for (i = 0; i < 2; i++)
            {
            double mark0Copy[5];
            char strand = i == 0 ? '+' : '-';
            for (j = 0; j <= 4; j++)
                mark0Copy[j] = mark0[j];
            if(strand == '-')
                {
                // reverse markov table too!
                double tmp;
                reverseComplement(seq->dna, dnaLength);
                tmp = mark0Copy[1];
                mark0Copy[1] = mark0Copy[3];
                mark0Copy[3] = tmp;
                tmp = mark0Copy[2];
                mark0Copy[2] = mark0Copy[4];
                mark0Copy[4] = tmp;
                }
            for (j = 0; j < length - motif->columnCount + 1; j++)
                // tricky b/c if(markovFound) then seq includes the two bytes on either side of actual sequence.
                {
                double score;
                if(markovFound)
                    score = dnaMotifBitScoreWithMarkovBg(motif, seq->dna + j, mark2);
                else
                    score = dnaMotifBitScoreWithMark0Bg(motif, seq->dna + j, mark0Copy);
                if(score >= minScoreCutoff)
                    {
                    int start;
                    if(strand == '-')
                        start = (chromEnd - j) - motif->columnCount;
                    else
                        start = chromStart + j;
                    struct bed6FloatScore *hit = NULL;

                    // Watch out for overlapping hits (on either strand; yes, I've seen that happen);
                    // we report only the highest scoring hit in this case.
                    // O(n^2) where n == number of motifs in a peak, but I expect n to be almost always very small.
                    if(!originalCoordinates)
                        {
                        for (hit = hits; hit != NULL; hit = hit->next)
                            {
                            if(hit->chromEnd > start && hit->chromStart <= (start + motif->columnCount))
                                {
                                verbose(3, "found overlapping hits: %d-%d overlaps with %d-%d\n", start, start + motif->columnCount, hit->chromStart, hit->chromEnd);
                                break;
                                }
                            }
                        }
                    if(hit == NULL || hit->score < score)
                        {
                        if(hit == NULL)
                            {
                            AllocVar(hit);
                            slAddHead(&hits, hit);
                            hit->chrom = cloneString(chrom);
                            }
                        hit->chromStart = originalCoordinates ? chromStart : start;
                        hit->chromEnd = originalCoordinates ? chromEnd : start + motif->columnCount;
                        hit->score = score;
                        hit->strand[0] = strand;
                        }
                    }
                verbose(3, "j: %d; score: %.2f\n", j, score);
                }
            }
        slSort(&hits, bed6FloatCmpDesc);
        int count;
        float currentPrior = prior;
        for(count = 1; hits != NULL; count++, hits = hits->next)
            {
            if(topOnly && count > topOnly)
                break;
            // Use a progressively weaker prior for hits with lower scores
            verbose(3, "count: %d; score: %.2f; prior: %.2f; log2(prior / (1 - prior)): %.2f\n", count, hits->score, currentPrior, log2(currentPrior / (1 - currentPrior)));
            if(hits->score >= minScoreCutoff - log2(currentPrior / (1 - currentPrior)))
                {
                printf("%s\t%d\t%d\t%s\t%.2f\t%c\n", chrom, originalCoordinates ? chromStart : hits->chromStart, 
                       originalCoordinates ? chromEnd : hits->chromStart + motif->columnCount, name, hits->score, hits->strand[0]);
                currentPrior = count == 1 ? priorBackoff : currentPrior * priorBackoff;
                if(count > 2)
                    verbose(3, "hit for count: %d at %s:%d-%d\n", count, chrom, hits->chromStart, hits->chromStart + motif->columnCount);
                }
            else
                break;
            }
        freeDnaSeq(&seq);
        freeMem(dupe);
        }
    lineFileClose(&lf);
    }
sqlDisconnect(&conn);
}
Пример #8
0
void intronEnds(char *database, char *table)
/* intronEnds - Gather stats on intron ends.. */
{
struct dyString *query = newDyString(1024);
struct sqlConnection *conn;
struct sqlResult *sr;
char **row;
struct genePred *gp;
int total = 0;
int gtag = 0;
int gcag = 0;
int atac = 0;
int ctac = 0;
DNA ends[4];
int exonIx, txStart;
struct dnaSeq *seq;
int rowOffset;
char strand;

rowOffset = hOffsetPastBin(database, NULL, table);
conn = hAllocConn(database);
sqlDyStringPrintf(query, "select * from %s", table);
if (chromName != NULL)
    dyStringPrintf(query, " where chrom = '%s'", chromName);
if (cgiBoolean("withUtr"))
    {
    dyStringPrintf(query, " %s txStart != cdsStart", 
        (chromName == NULL ? "where" : "and"));
    }
sr = sqlGetResult(conn, query->string);
while ((row = sqlNextRow(sr)) != NULL)
    {
    gp = genePredLoad(row+rowOffset);
    strand = gp->strand[0];
    txStart = gp->txStart;
    seq = hDnaFromSeq(database, gp->chrom, txStart, gp->txEnd, dnaLower);
    for (exonIx=1; exonIx < gp->exonCount; ++exonIx)
        {
	++total;
	memcpy(ends, seq->dna + gp->exonEnds[exonIx-1] - txStart, 2);
	memcpy(ends+2, seq->dna + gp->exonStarts[exonIx] - txStart - 2, 2);
	if (strand == '-')
	    reverseComplement(ends, 4);
	if (ends[0] == 'g' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'g')
	   ++gtag;
	if (ends[0] == 'g' && ends[1] == 'c' && ends[2] == 'a' && ends[3] == 'g')
	   ++gcag;
	if (ends[0] == 'a' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++atac;
	if (ends[0] == 'c' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++ctac;
	}
    freeDnaSeq(&seq);
    genePredFree(&gp);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
printf("gt/ag %d (%4.2f)\n", gtag, 100.0*gtag/total);
printf("gc/ag %d (%4.2f)\n", gcag, 100.0*gcag/total);
printf("at/ac %d (%4.2f)\n", atac, 100.0*atac/total);
printf("ct/ac %d (%4.2f)\n", ctac, 100.0*ctac/total);
printf("Total %d\n", total);
}
static void hgSeqConcatRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name,
                                 int rCount, unsigned *rStarts, unsigned *rSizes,
                                 boolean *exonFlags, boolean *cdsFlags)
/* Concatenate and print out dna for a series of regions. */
{
// Note: this code use to generate different sequence ids if the global
// database in hdb was different than the db parameter.  This functionality
// has been removed since the global database was removed and it didn't
// appear to be used.

    struct dnaSeq *rSeq = NULL;
    struct dnaSeq *cSeq = NULL;
    char recName[256];
    int seqStart, seqEnd;
    int offset, cSize;
    int i;
    boolean isRc     = (strand == '-') || cgiBoolean("hgSeq.revComp");
    boolean maskRep  = cgiBoolean("hgSeq.maskRepeats");
    int padding5     = cgiOptionalInt("hgSeq.padding5", 0);
    int padding3     = cgiOptionalInt("hgSeq.padding3", 0);
    char *casing     = cgiString("hgSeq.casing");
    char *repMasking = cgiString("hgSeq.repMasking");
    char *granularity  = cgiOptionalString("hgSeq.granularity");
    boolean concatRegions = granularity && sameString("gene", granularity);

    if (rCount < 1)
        return;

    /* Don't support padding if granularity is gene (i.e. concat'ing all). */
    if (concatRegions)
    {
        padding5 = padding3 = 0;
    }

    i = rCount - 1;
    seqStart = rStarts[0]             - (isRc ? padding3 : padding5);
    seqEnd   = rStarts[i] + rSizes[i] + (isRc ? padding5 : padding3);
    /* Padding might push us off the edge of the chrom; if so, truncate: */
    if (seqStart < 0)
    {
        if (isRc)
            padding3 += seqStart;
        else
            padding5 += seqStart;
        seqStart = 0;
    }

    /* if we know the chromSize, don't pad out beyond it */
    if ((chromSize > 0) && (seqEnd > chromSize))
    {
        if (isRc)
            padding5 += (chromSize - seqEnd);
        else
            padding3 += (chromSize - seqEnd);
        seqEnd = chromSize;
    }
    if (seqEnd <= seqStart)
    {
        printf("# Null range for %s_%s (range=%s:%d-%d 5'pad=%d 3'pad=%d) (may indicate a query-side insert)\n",
               db,
               name,
               chrom, seqStart+1, seqEnd,
               padding5, padding3);
        return;
    }
    if (maskRep)
    {
        rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaMixed);
        if (sameString(repMasking, "N"))
            lowerToN(rSeq->dna, strlen(rSeq->dna));
        if (!sameString(casing, "upper"))
            tolowers(rSeq->dna);
    }
    else if (sameString(casing, "upper"))
        rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaUpper);
    else
        rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaLower);

    /* Handle casing and compute size of concatenated sequence */
    cSize = 0;
    for (i=0;  i < rCount;  i++)
    {
        if ((sameString(casing, "exon") && exonFlags[i]) ||
                (sameString(casing, "cds") && cdsFlags[i]))
        {
            int rStart = rStarts[i] - seqStart;
            toUpperN(rSeq->dna+rStart, rSizes[i]);
        }
        cSize += rSizes[i];
    }
    cSize += (padding5 + padding3);
    AllocVar(cSeq);
    cSeq->dna = needLargeMem(cSize+1);
    cSeq->size = cSize;

    offset = 0;
    for (i=0;  i < rCount;  i++)
    {
        int start = rStarts[i] - seqStart;
        int size  = rSizes[i];
        if (i == 0)
        {
            start -= (isRc ? padding3 : padding5);
            assert(start == 0);
            size  += (isRc ? padding3 : padding5);
        }
        if (i == rCount-1)
        {
            size  += (isRc ? padding5 : padding3);
        }
        memcpy(cSeq->dna+offset, rSeq->dna+start, size);
        offset += size;
    }
    assert(offset == cSeq->size);
    cSeq->dna[offset] = 0;
    freeDnaSeq(&rSeq);

    if (isRc)
        reverseComplement(cSeq->dna, cSeq->size);

    safef(recName, sizeof(recName),
          "%s_%s range=%s:%d-%d 5'pad=%d 3'pad=%d "
          "strand=%c repeatMasking=%s",
          db,
          name,
          chrom, seqStart+1, seqEnd,
          padding5, padding3,
          (isRc ? '-' : '+'),
          (maskRep ? repMasking : "none"));
    faWriteNext(stdout, recName, cSeq->dna, cSeq->size);
    freeDnaSeq(&cSeq);
}
Пример #10
0
void doClusterMotifDetails(struct sqlConnection *conn, struct trackDb *tdb, 
                                struct factorSource *cluster)
/* Display details about TF binding motif(s) in cluster */
{
char *motifTable = trackDbSetting(tdb, "motifTable");         // localizations
char *motifPwmTable = trackDbSetting(tdb, "motifPwmTable");   // PWM used to draw sequence logo
char *motifMapTable = trackDbSetting(tdb, "motifMapTable");   // map target to motif
struct slName *motifNames = NULL, *mn; // list of canonical motifs for the factor
struct dnaMotif *motif = NULL;
struct bed6FloatScore *hit = NULL, *maxHit = NULL;
char **row;
char query[256];

if (motifTable != NULL && sqlTableExists(conn, motifTable))
    {
    struct sqlResult *sr;
    int rowOffset;
    char where[256];

    if (motifMapTable == NULL || !sqlTableExists(conn, motifMapTable))
        {
        // Assume cluster name is motif name if there is no map table
        motifNames = slNameNew(cluster->name);
        }
    else
        {
        sqlSafef(query, sizeof(query),
                "select motif from %s where target = '%s'", motifMapTable, cluster->name);
        char *ret = sqlQuickString(conn, query);
        if (ret == NULL)
            {
            // missing target from table -- no canonical motif
            webNewEmptySection();
            return;
            }
        motifNames = slNameListFromString(ret, ',');
        }
    for (mn = motifNames; mn != NULL; mn = mn->next)
        {
        sqlSafefFrag(where, sizeof(where), "name='%s' order by score desc limit 1", mn->name);
        sr = hRangeQuery(conn, motifTable, cluster->chrom, cluster->chromStart,
                     cluster->chromEnd, where, &rowOffset);
        if ((row = sqlNextRow(sr)) != NULL)
            {
            hit = bed6FloatScoreLoad(row + rowOffset);
            if (maxHit == NULL || maxHit->score < hit->score)
                maxHit = hit;
            }
        sqlFreeResult(&sr);
        }
    }
if (maxHit == NULL)
    {
    // Maintain table layout
    webNewEmptySection();
    return;
    }
hit = maxHit;

webNewSection("Canonical Motif in Cluster");
char posLink[1024];
safef(posLink, sizeof(posLink),"<a href=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</a>",
        hgTracksPathAndSettings(), database, 
            cluster->chrom, hit->chromStart+1, hit->chromEnd,
            cluster->chrom, hit->chromStart+1, hit->chromEnd);
printf("<b>Motif Name:</b>  %s<br>\n", hit->name);
printf("<b>Motif Score");
printf(":</b>  %.2f<br>\n", hit->score);
printf("<b>Motif Position:</b> %s<br>\n", posLink);
printf("<b>Motif Strand:</b> %c<br>\n", (int)hit->strand[0]);

struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower);
if (seq == NULL)
    return;
if (hit->strand[0] == '-')
    reverseComplement(seq->dna, seq->size);
if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable))
    {
    motif = loadDnaMotif(hit->name, motifPwmTable);
    if (motif == NULL)
        return;
    motifLogoAndMatrix(&seq, 1, motif);
    }
}
Пример #11
0
void processVariant(char *database, char *proteinID, int aaSeqLen, int varStart, int varLen, 
		    char *origSeq,  char *varSeq, char *varId, FILE *outf)
{
char query[256];
struct sqlResult *sr;
char **row;
struct sqlConnection  *conn;

char *qNameStr;
char *qSizeStr;
char *qStartStr;
char *qEndStr;
char *tNameStr=NULL;
char *tSizeStr;
char *tStartStr;
char *tEndStr;
char *blockCountStr;
char *blockSizesStr;
char *qStartsStr;
char *tStartsStr;

char *strand       = NULL;
int blockCount=0;
char *exonStartStr = NULL;
int exonGenomeStartPos, exonGenomeEndPos;
char *exonGenomeStartStr = NULL;
char *chp, *chp2, *chp3;
char *exonSizeStr  = NULL;
int j;

int exonStart, exonEnd;
int lastStart;
int lastLen;
int accumLen;
int exonLen;
int dvStart;
int varEnd;
struct dnaSeq *dnaseq;

conn= hAllocConn();
/* NOTE: the query below may not always return single answer, */
/* and kgProtMap and knownGene alignments may not be identical, so pick the closest one. */

safef(query,sizeof(query), "select qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, blockCount, blockSizes, qStarts, tStarts, strand from %s.%s where qName='%s';",
        database, "kgProtMap", proteinID);
sr  = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);

if (row == NULL)
    {
    sqlFreeResult(&sr);
    hFreeConn(&conn);
    return;
    }

while (row != NULL)
    {
    qNameStr        = cloneString(row[0]);
    qSizeStr        = cloneString(row[1]);
    qStartStr       = cloneString(row[2]);
    qEndStr         = cloneString(row[3]);
    tNameStr        = cloneString(row[4]);
    tSizeStr        = cloneString(row[5]);
    tStartStr       = cloneString(row[6]);
    tEndStr         = cloneString(row[7]);
    blockCountStr   = cloneString(row[8]);
    blockSizesStr   = cloneString(row[9]);
    qStartsStr      = cloneString(row[10]);
    tStartsStr      = cloneString(row[11]);
    strand          = cloneString(row[12]);

    blockCount 	   = atoi(blockCountStr);

if (sameWord(strand, "+"))
    {
    exonStartStr   = qStartsStr;
    exonGenomeStartStr = tStartsStr;
    exonSizeStr    = blockSizesStr;
    
    accumLen = 0;
    lastStart = 0;
    chp2 = exonGenomeStartStr;
    chp3 = blockSizesStr;
    for (j=0; j< blockCount; j++)
    	{
	chp = strstr(chp2, ",");
	*chp = '\0';
	exonStart = atoi(chp2);
	chp2 = chp+1;
	
	chp = strstr(chp3, ",");
	*chp = '\0';
	exonLen = atoi(chp3);
	chp3 = chp + 1;
	
	if (((accumLen + exonLen)/3) >= varStart)
	    {
	    dvStart = exonStart + (varStart - accumLen/3)*3;
	    np++;
	    dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); 
	    
	    /* check if first AA of variant agrees with genomic codon */
	    if (lookupCodon(dnaseq->dna) == origSeq[0]) 
		{
		np0++;
	        fprintf(outf, "%s\t%d\t%d\t%s\n", tNameStr, dvStart, dvStart+varLen*3, varId);
		}
	    else
		{
	        /* try next position */
		dvStart = exonStart + (varStart - accumLen/3)*3 - 1;
		
		dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); 
		if (lookupCodon(dnaseq->dna) == origSeq[0]) 
			{
			np1++;
	        	fprintf(outf, "%s\t%d\t%d\t%s\n", 
				tNameStr, dvStart, dvStart+varLen*3, varId);
			}
		else
			{
			/* could further scan wider range for valid AA mapping */
			}
     		}
	    break;
	    }
	    
	lastStart = exonStart;
	accumLen = accumLen + exonLen;
	}
    }
else
    {
    exonStartStr   = qStartsStr;
    exonGenomeStartStr = tStartsStr;
    exonSizeStr    = blockSizesStr;

    varEnd = aaSeqLen - varStart - varLen;
    
    accumLen = 0;
    lastStart = 0;
    chp2 = exonGenomeStartStr;
    chp3 = blockSizesStr;
    for (j=0; j< blockCount; j++)
    	{
	chp = strstr(chp2, ",");
	*chp = '\0';
	exonStart = atoi(chp2);
	chp2 = chp+1;
	
	chp = strstr(chp3, ",");
	*chp = '\0';
	exonLen = atoi(chp3);
	chp3 = chp + 1;

	if (((accumLen + exonLen)/3) >= varEnd)
	    {
	    nn++;
	    dvStart =  exonStart + (varEnd - accumLen/3)*3;
	
	    dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); 
	    reverseComplement(dnaseq->dna, (long)3);
  	    if (lookupCodon(dnaseq->dna) == origSeq[0])
            	{
		nn0++;
	    	fprintf(outf, "%s\t%d\t%d\t%s\n", tNameStr, dvStart, dvStart+varLen*3, varId);
	    	}
	    else
	    	{
	    	dvStart =  exonStart + (varEnd - accumLen/3)*3 - 1;
	
	    	dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); 
	    	reverseComplement(dnaseq->dna, (long)3);
  	    	if (lookupCodon(dnaseq->dna) == origSeq[0])
            	    {
		    nn1++;

	    	    fprintf(outf, "%s\t%d\t%d\t%s\n", tNameStr, dvStart, dvStart+varLen*3, varId);
		    }
		else
		    {
		    /* deal with this later */
		    }
		}
	    
	    fflush(outf);
	    break;
	    }
	    
	lastStart = exonStart;
	accumLen = accumLen + exonLen;
	}
    }

    row = sqlNextRow(sr);
    }

sqlFreeResult(&sr);
hFreeConn(&conn);
}