void doTriangle(struct trackDb *tdb, char *item, char *motifTable) /* Display detailed info on a regulatory triangle item. */ { int start = cartInt(cart, "o"); struct dnaSeq *seq = NULL; struct dnaMotif *motif = loadDnaMotif(item, motifTable); char *table = tdb->table; int rowOffset = hOffsetPastBin(database, seqName, table); char query[256]; struct sqlResult *sr; char **row; struct bed *hit = NULL; struct sqlConnection *conn = hAllocConn(database); cartWebStart(cart, database, "Regulatory Motif Info"); genericBedClick(conn, tdb, item, start, 6); sqlSafef(query, sizeof query, "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", table, item, seqName, start); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) hit = bedLoadN(row + rowOffset, 6); sqlFreeResult(&sr); if (hit != NULL) { seq = hDnaFromSeq(database, hit->chrom, hit->chromStart, hit->chromEnd, dnaLower); if (hit->strand[0] == '-') reverseComplement(seq->dna, seq->size); } motifHitSection(seq, motif); printTrackHtml(tdb); }
void doTransRegCode(struct trackDb *tdb, char *item, char *motifTable) /* Display detailed info on a transcriptional regulatory code item. */ { struct dnaMotif *motif = loadDnaMotif(item, motifTable); int start = cartInt(cart, "o"); struct dnaSeq *seq = NULL; char *table = tdb->table; int rowOffset = hOffsetPastBin(database, seqName, table); char query[256]; struct sqlResult *sr; char **row; struct sqlConnection *conn = hAllocConn(database); struct transRegCode *trc = NULL; cartWebStart(cart, database, "Regulatory Code Info"); sqlSafef(query, sizeof query, "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", table, item, seqName, start); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) trc = transRegCodeLoad(row+rowOffset); sqlFreeResult(&sr); if (trc != NULL) { char strand[2]; seq = hDnaFromSeq(database, trc->chrom, trc->chromStart, trc->chromEnd, dnaLower); if (seq->size != motif->columnCount) { printf("WARNING: seq->size = %d, motif->colCount=%d<BR>\n", seq->size, motif->columnCount); strand[0] = '?'; seq = NULL; } else { strand[0] = dnaMotifBestStrand(motif, seq->dna); if (strand[0] == '-') reverseComplement(seq->dna, seq->size); } strand[1] = 0; printf("<B>Name:</B> "); sacCerHgGeneLinkName(conn, trc->name); printf("<BR>\n"); printf("<B>ChIP-chip Evidence:</B> %s<BR>\n", trc->chipEvidence); printf("<B>Species conserved in:</B> %d of 2<BR>\n", trc->consSpecies); if (seq != NULL) printf("<B>Bit Score of Motif Hit:</B> %4.2f<BR>\n", dnaMotifBitScore(motif, seq->dna)); printf("<B>Item score:</B> %d<BR>\n", trc->score); printPosOnChrom(trc->chrom, trc->chromStart, trc->chromEnd, strand, TRUE, trc->name); } motifHitSection(seq, motif); printTrackHtml(tdb); }
struct dnaSeq *dnaFromChrom(char *db, char *chrom, int chromStart, int chromEnd, enum dnaCase seqCase) /** Return the dna for the chromosome region specified. */ { struct dnaSeq *seq = NULL; if(chromNib != NULL) { seq = (struct dnaSeq *)nibLdPart(chromNib, chromNibFile, chromNibSize, chromStart, chromEnd - chromStart); } else seq = hDnaFromSeq(db, chrom, chromStart, chromEnd, seqCase); return seq; }
static double motifScoreHere(char *chrom, int start, int end, char *motifName, char *motifTable) /* Return score of motif at given position. */ { double score; struct dnaSeq *seq = hDnaFromSeq(database, chrom, start, end, dnaLower); struct dnaMotif *motif = loadDnaMotif(motifName, motifTable); char strand = dnaMotifBestStrand(motif, seq->dna); if (strand == '-') reverseComplement(seq->dna, seq->size); score = dnaMotifBitScore(motif, seq->dna); dnaMotifFree(&motif); dnaSeqFree(&seq); return score; }
void chromFeatureSeq(struct sqlConnection *conn, char *database, char *chrom, char *trackSpec, FILE *bedFile, FILE *faFile, int *retItemCount, int *retBaseCount) /* Write out sequence file for features from one chromosome. * This separate routine handles the non-merged case. It's * reason for being is so that the feature names get preserved. */ { boolean hasBin; char t[512], *s = NULL; char table[HDB_MAX_TABLE_STRING]; struct featureBits *fbList = NULL, *fb; if (trackSpec[0] == '!') errAbort("Sorry, '!' not available with fa output unless you use faMerge"); isolateTrackPartOfSpec(trackSpec, t); s = strchr(t, '.'); if (s != NULL) errAbort("Sorry, only database (not file) tracks allowed with " "fa output unless you use faMerge"); // ignore isSplit return from hFindSplitTable() (void) hFindSplitTable(database, chrom, t, table, &hasBin); fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom), where, TRUE, TRUE); for (fb = fbList; fb != NULL; fb = fb->next) { int s = fb->start, e = fb->end; if (bedFile != NULL) { fprintf(bedFile, "%s\t%d\t%d\t%s", fb->chrom, fb->start, fb->end, fb->name); if (fb->strand != '?') fprintf(bedFile, "\t0\t%c", fb->strand); fprintf(bedFile, "\n"); } if (faFile != NULL) { struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower); if (fb->strand == '-') reverseComplement(seq->dna, seq->size); faWriteNext(faFile, fb->name, seq->dna, seq->size); freeDnaSeq(&seq); } } featureBitsFreeList(&fbList); }
void runSamples(char *goodFile, char *badFile, char *newDb, char *oldDb, int numToRun) /* run a bunch of tests */ { int i,j,k; FILE *good = mustOpen(goodFile, "w"); FILE *bad = mustOpen(badFile, "w"); char *tmp = NULL; int numGood=0, numBad=0, tooManyNs=0; boolean success = FALSE; struct dnaSeq *seq = NULL; printf("Running Tests\t"); for(i=0;i<numToRun;i++) { struct coordConvRep *ccr = NULL; struct coordConv *cc = NULL; if(!(i%10)) putTic(); cc = getRandomCoord(oldDb); seq = hDnaFromSeq(cc->chrom, cc->chromStart, cc->chromEnd, dnaLower); if(!(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn"))) { chrom = cc->chrom; chromStart = cc->chromStart; chromEnd = cc->chromEnd; success = convertCoordinates(good, bad, printReport, printReport); if(success) numGood++; else numBad++; } else { tooManyNs++; } freeDnaSeq(&seq); coordConvFree(&cc); } carefulClose(&good); carefulClose(&bad); printf("\tDone.\n"); printf("Out of %d attempts got %d 'succesfully converted' and %d 'had problems', %d had too many N's\n", (numGood + numBad), numGood, numBad, tooManyNs); printf("After checking got %d of %d correctly called and %d incorrectly called.\n", hgTestCorrect, hgTestCorrect+hgTestWrong, hgTestWrong); }
void motifFinder(char *database, char *name, int fileCount, char *files[]) /* motifFinder - find largest scoring motif in bed items. */ { struct sqlConnection *conn = sqlConnect(database); int fileNum; char where[256]; struct chromInfo *ci = createChromInfoList(NULL, database); sqlSafefFrag(where, sizeof(where), "name = '%s'", name); struct dnaMotif *motif = dnaMotifLoadWhere(conn, motifTable, where); if(markovTable != NULL) dnaMotifMakeLog2(motif); if(motif == NULL) errAbort("couldn't find motif '%s'", name); for (fileNum = 0; fileNum < fileCount; fileNum++) { char *words[64], *line; char **row; struct lineFile *lf = lineFileOpen(files[fileNum], TRUE); while (lineFileNextReal(lf, &line)) { int dnaLength, i, j, rowOffset, length, wordCount = chopTabs(line, words); unsigned chromSize; boolean markovFound = FALSE; double mark0[5]; double mark2[5][5][5]; struct dnaSeq *seq = NULL; char *dupe = NULL; if (0 == wordCount) continue; lineFileExpectAtLeast(lf, 3, wordCount); dupe = cloneString(line); char *chrom = words[0]; int chromStart = lineFileNeedNum(lf, words, 1); if(markovTable != NULL) chromStart = max(2, chromStart); unsigned chromEnd = lineFileNeedNum(lf, words, 2); if (chromEnd < 1) errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n", lf->lineIx, dupe); if (chromStart > chromEnd) errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n", lf->lineIx, dupe, chromStart, chromEnd); length = chromEnd - chromStart; chromSize = getChromSize(ci, chrom); if(markovTable == NULL) { dnaLength = length; seq = hDnaFromSeq(database, chrom, chromStart, chromEnd, dnaUpper); if(uniformBackground) { int i; mark0[0] = 1; for(i = 1; i <= 4; i++) mark0[i] = 0.25; } else { dnaMark0(seq, mark0, NULL); } } else { dnaLength = length + 4; if(chromStart - 2 + dnaLength > chromSize) // can't do analysis for potential peak hanging off the end of the chrom continue; seq = hDnaFromSeq(database, chrom, chromStart - 2, chromEnd + 2, dnaUpper); struct sqlResult *sr = hRangeQuery(conn, markovTable, chrom, chromStart, chromStart + 1, NULL, &rowOffset); if((row = sqlNextRow(sr)) != NULL) { dnaMark2Deserialize(row[rowOffset + 3], mark2); dnaMarkMakeLog2(mark2); markovFound = TRUE; } else errAbort("markov table '%s' is missing; non-markov analysis is current not supported", markovTable); sqlFreeResult(&sr); } struct bed6FloatScore *hits = NULL; for (i = 0; i < 2; i++) { double mark0Copy[5]; char strand = i == 0 ? '+' : '-'; for (j = 0; j <= 4; j++) mark0Copy[j] = mark0[j]; if(strand == '-') { // reverse markov table too! double tmp; reverseComplement(seq->dna, dnaLength); tmp = mark0Copy[1]; mark0Copy[1] = mark0Copy[3]; mark0Copy[3] = tmp; tmp = mark0Copy[2]; mark0Copy[2] = mark0Copy[4]; mark0Copy[4] = tmp; } for (j = 0; j < length - motif->columnCount + 1; j++) // tricky b/c if(markovFound) then seq includes the two bytes on either side of actual sequence. { double score; if(markovFound) score = dnaMotifBitScoreWithMarkovBg(motif, seq->dna + j, mark2); else score = dnaMotifBitScoreWithMark0Bg(motif, seq->dna + j, mark0Copy); if(score >= minScoreCutoff) { int start; if(strand == '-') start = (chromEnd - j) - motif->columnCount; else start = chromStart + j; struct bed6FloatScore *hit = NULL; // Watch out for overlapping hits (on either strand; yes, I've seen that happen); // we report only the highest scoring hit in this case. // O(n^2) where n == number of motifs in a peak, but I expect n to be almost always very small. if(!originalCoordinates) { for (hit = hits; hit != NULL; hit = hit->next) { if(hit->chromEnd > start && hit->chromStart <= (start + motif->columnCount)) { verbose(3, "found overlapping hits: %d-%d overlaps with %d-%d\n", start, start + motif->columnCount, hit->chromStart, hit->chromEnd); break; } } } if(hit == NULL || hit->score < score) { if(hit == NULL) { AllocVar(hit); slAddHead(&hits, hit); hit->chrom = cloneString(chrom); } hit->chromStart = originalCoordinates ? chromStart : start; hit->chromEnd = originalCoordinates ? chromEnd : start + motif->columnCount; hit->score = score; hit->strand[0] = strand; } } verbose(3, "j: %d; score: %.2f\n", j, score); } } slSort(&hits, bed6FloatCmpDesc); int count; float currentPrior = prior; for(count = 1; hits != NULL; count++, hits = hits->next) { if(topOnly && count > topOnly) break; // Use a progressively weaker prior for hits with lower scores verbose(3, "count: %d; score: %.2f; prior: %.2f; log2(prior / (1 - prior)): %.2f\n", count, hits->score, currentPrior, log2(currentPrior / (1 - currentPrior))); if(hits->score >= minScoreCutoff - log2(currentPrior / (1 - currentPrior))) { printf("%s\t%d\t%d\t%s\t%.2f\t%c\n", chrom, originalCoordinates ? chromStart : hits->chromStart, originalCoordinates ? chromEnd : hits->chromStart + motif->columnCount, name, hits->score, hits->strand[0]); currentPrior = count == 1 ? priorBackoff : currentPrior * priorBackoff; if(count > 2) verbose(3, "hit for count: %d at %s:%d-%d\n", count, chrom, hits->chromStart, hits->chromStart + motif->columnCount); } else break; } freeDnaSeq(&seq); freeMem(dupe); } lineFileClose(&lf); } sqlDisconnect(&conn); }
void intronEnds(char *database, char *table) /* intronEnds - Gather stats on intron ends.. */ { struct dyString *query = newDyString(1024); struct sqlConnection *conn; struct sqlResult *sr; char **row; struct genePred *gp; int total = 0; int gtag = 0; int gcag = 0; int atac = 0; int ctac = 0; DNA ends[4]; int exonIx, txStart; struct dnaSeq *seq; int rowOffset; char strand; rowOffset = hOffsetPastBin(database, NULL, table); conn = hAllocConn(database); sqlDyStringPrintf(query, "select * from %s", table); if (chromName != NULL) dyStringPrintf(query, " where chrom = '%s'", chromName); if (cgiBoolean("withUtr")) { dyStringPrintf(query, " %s txStart != cdsStart", (chromName == NULL ? "where" : "and")); } sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { gp = genePredLoad(row+rowOffset); strand = gp->strand[0]; txStart = gp->txStart; seq = hDnaFromSeq(database, gp->chrom, txStart, gp->txEnd, dnaLower); for (exonIx=1; exonIx < gp->exonCount; ++exonIx) { ++total; memcpy(ends, seq->dna + gp->exonEnds[exonIx-1] - txStart, 2); memcpy(ends+2, seq->dna + gp->exonStarts[exonIx] - txStart - 2, 2); if (strand == '-') reverseComplement(ends, 4); if (ends[0] == 'g' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'g') ++gtag; if (ends[0] == 'g' && ends[1] == 'c' && ends[2] == 'a' && ends[3] == 'g') ++gcag; if (ends[0] == 'a' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c') ++atac; if (ends[0] == 'c' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c') ++ctac; } freeDnaSeq(&seq); genePredFree(&gp); } sqlFreeResult(&sr); hFreeConn(&conn); printf("gt/ag %d (%4.2f)\n", gtag, 100.0*gtag/total); printf("gc/ag %d (%4.2f)\n", gcag, 100.0*gcag/total); printf("at/ac %d (%4.2f)\n", atac, 100.0*atac/total); printf("ct/ac %d (%4.2f)\n", ctac, 100.0*ctac/total); printf("Total %d\n", total); }
static void hgSeqConcatRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name, int rCount, unsigned *rStarts, unsigned *rSizes, boolean *exonFlags, boolean *cdsFlags) /* Concatenate and print out dna for a series of regions. */ { // Note: this code use to generate different sequence ids if the global // database in hdb was different than the db parameter. This functionality // has been removed since the global database was removed and it didn't // appear to be used. struct dnaSeq *rSeq = NULL; struct dnaSeq *cSeq = NULL; char recName[256]; int seqStart, seqEnd; int offset, cSize; int i; boolean isRc = (strand == '-') || cgiBoolean("hgSeq.revComp"); boolean maskRep = cgiBoolean("hgSeq.maskRepeats"); int padding5 = cgiOptionalInt("hgSeq.padding5", 0); int padding3 = cgiOptionalInt("hgSeq.padding3", 0); char *casing = cgiString("hgSeq.casing"); char *repMasking = cgiString("hgSeq.repMasking"); char *granularity = cgiOptionalString("hgSeq.granularity"); boolean concatRegions = granularity && sameString("gene", granularity); if (rCount < 1) return; /* Don't support padding if granularity is gene (i.e. concat'ing all). */ if (concatRegions) { padding5 = padding3 = 0; } i = rCount - 1; seqStart = rStarts[0] - (isRc ? padding3 : padding5); seqEnd = rStarts[i] + rSizes[i] + (isRc ? padding5 : padding3); /* Padding might push us off the edge of the chrom; if so, truncate: */ if (seqStart < 0) { if (isRc) padding3 += seqStart; else padding5 += seqStart; seqStart = 0; } /* if we know the chromSize, don't pad out beyond it */ if ((chromSize > 0) && (seqEnd > chromSize)) { if (isRc) padding5 += (chromSize - seqEnd); else padding3 += (chromSize - seqEnd); seqEnd = chromSize; } if (seqEnd <= seqStart) { printf("# Null range for %s_%s (range=%s:%d-%d 5'pad=%d 3'pad=%d) (may indicate a query-side insert)\n", db, name, chrom, seqStart+1, seqEnd, padding5, padding3); return; } if (maskRep) { rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaMixed); if (sameString(repMasking, "N")) lowerToN(rSeq->dna, strlen(rSeq->dna)); if (!sameString(casing, "upper")) tolowers(rSeq->dna); } else if (sameString(casing, "upper")) rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaUpper); else rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaLower); /* Handle casing and compute size of concatenated sequence */ cSize = 0; for (i=0; i < rCount; i++) { if ((sameString(casing, "exon") && exonFlags[i]) || (sameString(casing, "cds") && cdsFlags[i])) { int rStart = rStarts[i] - seqStart; toUpperN(rSeq->dna+rStart, rSizes[i]); } cSize += rSizes[i]; } cSize += (padding5 + padding3); AllocVar(cSeq); cSeq->dna = needLargeMem(cSize+1); cSeq->size = cSize; offset = 0; for (i=0; i < rCount; i++) { int start = rStarts[i] - seqStart; int size = rSizes[i]; if (i == 0) { start -= (isRc ? padding3 : padding5); assert(start == 0); size += (isRc ? padding3 : padding5); } if (i == rCount-1) { size += (isRc ? padding5 : padding3); } memcpy(cSeq->dna+offset, rSeq->dna+start, size); offset += size; } assert(offset == cSeq->size); cSeq->dna[offset] = 0; freeDnaSeq(&rSeq); if (isRc) reverseComplement(cSeq->dna, cSeq->size); safef(recName, sizeof(recName), "%s_%s range=%s:%d-%d 5'pad=%d 3'pad=%d " "strand=%c repeatMasking=%s", db, name, chrom, seqStart+1, seqEnd, padding5, padding3, (isRc ? '-' : '+'), (maskRep ? repMasking : "none")); faWriteNext(stdout, recName, cSeq->dna, cSeq->size); freeDnaSeq(&cSeq); }
void doClusterMotifDetails(struct sqlConnection *conn, struct trackDb *tdb, struct factorSource *cluster) /* Display details about TF binding motif(s) in cluster */ { char *motifTable = trackDbSetting(tdb, "motifTable"); // localizations char *motifPwmTable = trackDbSetting(tdb, "motifPwmTable"); // PWM used to draw sequence logo char *motifMapTable = trackDbSetting(tdb, "motifMapTable"); // map target to motif struct slName *motifNames = NULL, *mn; // list of canonical motifs for the factor struct dnaMotif *motif = NULL; struct bed6FloatScore *hit = NULL, *maxHit = NULL; char **row; char query[256]; if (motifTable != NULL && sqlTableExists(conn, motifTable)) { struct sqlResult *sr; int rowOffset; char where[256]; if (motifMapTable == NULL || !sqlTableExists(conn, motifMapTable)) { // Assume cluster name is motif name if there is no map table motifNames = slNameNew(cluster->name); } else { sqlSafef(query, sizeof(query), "select motif from %s where target = '%s'", motifMapTable, cluster->name); char *ret = sqlQuickString(conn, query); if (ret == NULL) { // missing target from table -- no canonical motif webNewEmptySection(); return; } motifNames = slNameListFromString(ret, ','); } for (mn = motifNames; mn != NULL; mn = mn->next) { sqlSafefFrag(where, sizeof(where), "name='%s' order by score desc limit 1", mn->name); sr = hRangeQuery(conn, motifTable, cluster->chrom, cluster->chromStart, cluster->chromEnd, where, &rowOffset); if ((row = sqlNextRow(sr)) != NULL) { hit = bed6FloatScoreLoad(row + rowOffset); if (maxHit == NULL || maxHit->score < hit->score) maxHit = hit; } sqlFreeResult(&sr); } } if (maxHit == NULL) { // Maintain table layout webNewEmptySection(); return; } hit = maxHit; webNewSection("Canonical Motif in Cluster"); char posLink[1024]; safef(posLink, sizeof(posLink),"<a href=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</a>", hgTracksPathAndSettings(), database, cluster->chrom, hit->chromStart+1, hit->chromEnd, cluster->chrom, hit->chromStart+1, hit->chromEnd); printf("<b>Motif Name:</b> %s<br>\n", hit->name); printf("<b>Motif Score"); printf(":</b> %.2f<br>\n", hit->score); printf("<b>Motif Position:</b> %s<br>\n", posLink); printf("<b>Motif Strand:</b> %c<br>\n", (int)hit->strand[0]); struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower); if (seq == NULL) return; if (hit->strand[0] == '-') reverseComplement(seq->dna, seq->size); if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable)) { motif = loadDnaMotif(hit->name, motifPwmTable); if (motif == NULL) return; motifLogoAndMatrix(&seq, 1, motif); } }
void processVariant(char *database, char *proteinID, int aaSeqLen, int varStart, int varLen, char *origSeq, char *varSeq, char *varId, FILE *outf) { char query[256]; struct sqlResult *sr; char **row; struct sqlConnection *conn; char *qNameStr; char *qSizeStr; char *qStartStr; char *qEndStr; char *tNameStr=NULL; char *tSizeStr; char *tStartStr; char *tEndStr; char *blockCountStr; char *blockSizesStr; char *qStartsStr; char *tStartsStr; char *strand = NULL; int blockCount=0; char *exonStartStr = NULL; int exonGenomeStartPos, exonGenomeEndPos; char *exonGenomeStartStr = NULL; char *chp, *chp2, *chp3; char *exonSizeStr = NULL; int j; int exonStart, exonEnd; int lastStart; int lastLen; int accumLen; int exonLen; int dvStart; int varEnd; struct dnaSeq *dnaseq; conn= hAllocConn(); /* NOTE: the query below may not always return single answer, */ /* and kgProtMap and knownGene alignments may not be identical, so pick the closest one. */ safef(query,sizeof(query), "select qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, blockCount, blockSizes, qStarts, tStarts, strand from %s.%s where qName='%s';", database, "kgProtMap", proteinID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) { sqlFreeResult(&sr); hFreeConn(&conn); return; } while (row != NULL) { qNameStr = cloneString(row[0]); qSizeStr = cloneString(row[1]); qStartStr = cloneString(row[2]); qEndStr = cloneString(row[3]); tNameStr = cloneString(row[4]); tSizeStr = cloneString(row[5]); tStartStr = cloneString(row[6]); tEndStr = cloneString(row[7]); blockCountStr = cloneString(row[8]); blockSizesStr = cloneString(row[9]); qStartsStr = cloneString(row[10]); tStartsStr = cloneString(row[11]); strand = cloneString(row[12]); blockCount = atoi(blockCountStr); if (sameWord(strand, "+")) { exonStartStr = qStartsStr; exonGenomeStartStr = tStartsStr; exonSizeStr = blockSizesStr; accumLen = 0; lastStart = 0; chp2 = exonGenomeStartStr; chp3 = blockSizesStr; for (j=0; j< blockCount; j++) { chp = strstr(chp2, ","); *chp = '\0'; exonStart = atoi(chp2); chp2 = chp+1; chp = strstr(chp3, ","); *chp = '\0'; exonLen = atoi(chp3); chp3 = chp + 1; if (((accumLen + exonLen)/3) >= varStart) { dvStart = exonStart + (varStart - accumLen/3)*3; np++; dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); /* check if first AA of variant agrees with genomic codon */ if (lookupCodon(dnaseq->dna) == origSeq[0]) { np0++; fprintf(outf, "%s\t%d\t%d\t%s\n", tNameStr, dvStart, dvStart+varLen*3, varId); } else { /* try next position */ dvStart = exonStart + (varStart - accumLen/3)*3 - 1; dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); if (lookupCodon(dnaseq->dna) == origSeq[0]) { np1++; fprintf(outf, "%s\t%d\t%d\t%s\n", tNameStr, dvStart, dvStart+varLen*3, varId); } else { /* could further scan wider range for valid AA mapping */ } } break; } lastStart = exonStart; accumLen = accumLen + exonLen; } } else { exonStartStr = qStartsStr; exonGenomeStartStr = tStartsStr; exonSizeStr = blockSizesStr; varEnd = aaSeqLen - varStart - varLen; accumLen = 0; lastStart = 0; chp2 = exonGenomeStartStr; chp3 = blockSizesStr; for (j=0; j< blockCount; j++) { chp = strstr(chp2, ","); *chp = '\0'; exonStart = atoi(chp2); chp2 = chp+1; chp = strstr(chp3, ","); *chp = '\0'; exonLen = atoi(chp3); chp3 = chp + 1; if (((accumLen + exonLen)/3) >= varEnd) { nn++; dvStart = exonStart + (varEnd - accumLen/3)*3; dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); reverseComplement(dnaseq->dna, (long)3); if (lookupCodon(dnaseq->dna) == origSeq[0]) { nn0++; fprintf(outf, "%s\t%d\t%d\t%s\n", tNameStr, dvStart, dvStart+varLen*3, varId); } else { dvStart = exonStart + (varEnd - accumLen/3)*3 - 1; dnaseq = hDnaFromSeq(tNameStr, dvStart, dvStart+varLen*3, dnaUpper); reverseComplement(dnaseq->dna, (long)3); if (lookupCodon(dnaseq->dna) == origSeq[0]) { nn1++; fprintf(outf, "%s\t%d\t%d\t%s\n", tNameStr, dvStart, dvStart+varLen*3, varId); } else { /* deal with this later */ } } fflush(outf); break; } lastStart = exonStart; accumLen = accumLen + exonLen; } } row = sqlNextRow(sr); } sqlFreeResult(&sr); hFreeConn(&conn); }