int main(int argc, char *argv[]) { char *database; char *outFn; struct dnaSeq *seq; struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; if (argc != 4) usage(); database = argv[1]; conn2= hAllocConn(database); outFn = argv[2]; outf = mustOpen(outFn, "w"); tgtChrom = argv[3]; sqlSafef(query2, sizeof query2, "select secStr, name, chrom, chromStart, chromEnd, strand from evofold where chrom='%s'", tgtChrom); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { secStr = row2[0]; id = row2[1]; chrom = row2[2]; chromStart = atoi(row2[3]); chromEnd = atoi(row2[4]); strand = *row2[5]; seq = hChromSeq(database, chrom, chromStart, chromEnd); touppers(seq->dna); if (strand == '-') reverseComplement(seq->dna, seq->size); memSwapChar(seq->dna, seq->size, 'T', 'U'); safef(javaCmd, sizeof(javaCmd), "java -cp VARNAv3-7.jar fr.orsay.lri.varna.applications.VARNAcmd -sequenceDBN %s -structureDBN '%s' -o evoFold/%s/%s.png", seq->dna, secStr, chrom, id); fprintf(outf, "%s\n", javaCmd); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); fclose(outf); hFreeConn(&conn2); return(0); }
static void singleBamDetails(const bam1_t *bam) /* Print out the properties of this alignment. */ { const bam1_core_t *core = &bam->core; char *itemName = bam1_qname(bam); int tLength = bamGetTargetLength(bam); int tStart = core->pos, tEnd = tStart+tLength; boolean isRc = useStrand && bamIsRc(bam); printPosOnChrom(seqName, tStart, tEnd, NULL, FALSE, itemName); if (!skipQualityScore) printf("<B>Alignment Quality: </B>%d<BR>\n", core->qual); printf("<B>CIGAR string: </B><tt>%s</tt> (", bamGetCigar(bam)); bamShowCigarEnglish(bam); printf(")<BR>\n"); printf("<B>Tags:</B>"); bamShowTags(bam); puts("<BR>"); printf("<B>Flags: </B><tt>0x%02x:</tt><BR>\n ", core->flag); bamShowFlagsEnglish(bam); puts("<BR>"); if (bamIsRc(bam)) printf("<em>Note: although the read was mapped to the reverse strand of the genome, " "the sequence and CIGAR in BAM are relative to the forward strand.</em><BR>\n"); puts("<BR>"); struct dnaSeq *genoSeq = hChromSeq(database, seqName, tStart, tEnd); char *qSeq = bamGetQuerySequence(bam, FALSE); if (isNotEmpty(qSeq) && !sameString(qSeq, "*")) { char *qSeq = NULL; struct ffAli *ffa = bamToFfAli(bam, genoSeq, tStart, useStrand, &qSeq); printf("<B>Alignment of %s to %s:%d-%d%s:</B><BR>\n", itemName, seqName, tStart+1, tEnd, (isRc ? " (reverse complemented)" : "")); ffShowSideBySide(stdout, ffa, qSeq, 0, genoSeq->dna, tStart, tLength, 0, tLength, 8, isRc, FALSE); } if (!skipQualityScore && core->l_qseq > 0) { printf("<B>Sequence quality scores:</B><BR>\n<TT><TABLE><TR>\n"); UBYTE *quals = bamGetQueryQuals(bam, useStrand); int i; for (i = 0; i < core->l_qseq; i++) { if (i > 0 && (i % 24) == 0) printf("</TR>\n<TR>"); printf("<TD>%c<BR>%d</TD>", qSeq[i], quals[i]); } printf("</TR></TABLE></TT>\n"); } }
struct cassetteSeq *cassetteSeqFromBed(struct bed *bed, int targetExon) /* Consruct a cassetteSeq from a bed using the targetExon. */ { struct cassetteSeq *cseq = NULL; int i=0; char buff[1024]; int bedSize=0; int seqSize=0; int targetStart=0; /* Make sure the target exon is valid. */ if(targetExon >= bed->blockCount) errAbort("pickCassettePcrPrimers::cassetteSeqFromBed() - Got request" "for %d targetExon, but only %d exons present in %s\n", targetExon, bed->blockCount, bed->name); AllocVar(cseq); AllocVar(cseq->seq); snprintf(buff, sizeof(buff), "%s:%d-%d_%s", bed->chrom, bed->chromStart, bed->chromEnd, bed->name); cseq->name = cloneString(buff); bedSize = countBedSize(bed) + 1; cseq->seq->dna = needMem(sizeof(char)*bedSize); cseq->bed = cloneBed(bed); for(i=0; i<bed->blockCount; i++) { struct dnaSeq *seq = NULL; int chromStart = bed->chromStarts[i] + bed->chromStart; int chromEnd = bed->blockSizes[i] + chromStart; seq = hChromSeq(bed->chrom, chromStart, chromEnd); sprintf(cseq->seq->dna+seqSize, "%s", seq->dna); if(targetExon == i) targetStart = seqSize; seqSize += bed->blockSizes[i]; dnaSeqFree(&seq); } cseq->seq->size = seqSize; if(sameString(bed->strand, "-")) { reverseComplement(cseq->seq->dna, cseq->seq->size); targetStart = cseq->seq->size - targetStart - bed->blockSizes[targetExon]; } cseq->targetStart = targetStart; cseq->targetEnd = targetStart + bed->blockSizes[targetExon]; snprintf(cseq->strand, sizeof(cseq->strand), "%s", bed->strand); return cseq; }
void writeCassetteExon(struct bed *bedList, struct altGraphX *ag, int eIx, boolean *outputted, FILE *bedOutFile, FILE *outfile, FILE *html, float conf ) /* Write out the information for a cassette exon. */ { int i = eIx; struct bed *bed=NULL; if(bedOutFile != NULL) bedTabOutN(bedList,12, bedOutFile); writeBrowserLink(html, ag, conf, i); if(!outputted) { altGraphXTabOut(ag, stdout); *outputted = TRUE; } if(outfile != NULL) { struct dnaSeq *seq = hChromSeq(ag->tName, ag->vPositions[ag->edgeStarts[i]], ag->vPositions[ag->edgeEnds[i]]); if(sameString(ag->strand , "+")) reverseComplement(seq->dna, seq->size); if(seq->size < 200) faWriteNext(outfile, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); } }
void doDnaTrack(char *chrom, char strand, int exonCount, int len, int *yOffp) /* draw track for AA residue */ { int xx, yy; int j; int mrnaLen; int exonStartPos, exonEndPos; int exonGenomeStartPos, exonGenomeEndPos; int exonNumber; int printedExonNumber = -1; Color exonColor[2]; Color color; int k; struct dnaSeq *dna; char base[2]; char baseComp[2]; int dnaLen; Color defaultColor; defaultColor = vgFindColorIx(g_vg, 170, 170, 170); /* exonColor[0] = pbBlue; */ exonColor[0] = vgFindColorIx(g_vg, 0x00, 0x00, 0xd0); exonColor[1] = vgFindColorIx(g_vg, 0, 180, 0); base[1] = '\0'; baseComp[1] = '\0'; currentYoffset = *yOffp; calxy(0, *yOffp, &xx, &yy); /* The hypothetical mRNA length is 3 times of aaLen */ mrnaLen = len * 3; exonNumber = 1; exonStartPos = blockStartPositive[exonNumber-1]; exonEndPos = blockEndPositive[exonNumber-1]; exonGenomeStartPos = blockGenomeStartPositive[exonNumber-1]; exonGenomeEndPos = blockGenomeEndPositive[exonNumber-1]; dna = hChromSeq(database, chrom, exonGenomeStartPos, exonGenomeEndPos+1); dnaLen = strlen(dna->dna); k=0; for (j = 0; j < mrnaLen; j++) { if (j > exonEndPos) { if (printedExonNumber != exonNumber) { printedExonNumber = exonNumber; } if (exonNumber < exonCount) { exonNumber++; exonStartPos = blockStartPositive[exonNumber-1]; exonEndPos = blockEndPositive[exonNumber-1]; exonGenomeStartPos = blockGenomeStartPositive[exonNumber-1]; exonGenomeEndPos = blockGenomeEndPositive[exonNumber-1]; dna = hChromSeq(database, chrom, exonGenomeStartPos, exonGenomeEndPos+1); dnaLen = strlen(dna->dna); k=0; } } if ((j >= exonStartPos) && (j <= exonEndPos)) { if (strand == '+') { base[0] = toupper(*(dna->dna + k)); } else { base[0] = toupper(ntCompTable[(int)*(dna->dna + dnaLen - k -1 )]); baseComp[0] = toupper(*(dna->dna + dnaLen - k -1 )); } k++; color = exonColor[(exonNumber-1) % 2]; calxy(j/3, *yOffp, &xx, &yy); if (strand == '-') { vgTextRight(g_vg, xx-3+(j%3)*6, yy-3, 10, 10, color, g_font, base); vgTextRight(g_vg, xx-3+(j%3)*6, yy+9, 10, 10, color, g_font, baseComp); } else { vgTextRight(g_vg, xx-3+(j%3)*6, yy-3, 10, 10, color, g_font, base); } } color = pbBlue; } calxy0(0, *yOffp, &xx, &yy); vgBox(g_vg, 0, yy-10, xx, 30, bkgColor); if (strand == '-') { trackTitle = cloneString("Coding Sequence"); } else { trackTitle = cloneString("Genomic Sequence"); } vgTextRight(g_vg, xx-25, yy-4, 10, 10, MG_BLACK, g_font, trackTitle); trackTitleLen = strlen(trackTitle); mapBoxTrackTitle(xx-25-trackTitleLen*6, yy-6, trackTitleLen*6+12, 14, trackTitle, "dna"); if (strand == '-') { trackTitle = cloneString("Genomic Sequence"); vgTextRight(g_vg, xx-25, yy+9, 10, 10, MG_BLACK, g_font, trackTitle); trackTitleLen = strlen(trackTitle); mapBoxTrackTitle(xx-25-trackTitleLen*6, yy+7, trackTitleLen*6+12, 14, trackTitle, "dna"); } if (strand == '-') { *yOffp = *yOffp + 20; } else { *yOffp = *yOffp + 12; } }
void checkBedMatchesSeqs(struct cassetteSeq *cseq, struct bed *bed) /* Pull the bed sequences out of the database and make sure that they match the primer sequences in cseq. Second sequence in bed is always reverse complemented, matching sequences flip depeneding on which strand we're on: If gene is on '+' strand ++++++++++++++++++++++++++++++++++++++++++++-> <-------------------------------------------- llllllllll-> <-rrrrrrrrrr (rev-comp) If gene is on '-' strand ++++++++++++++++++++++++++++++++++++++++++++-> <-------------------------------------------- rrrrrrrrrr-> <-llllllllll (rev-comp) */ { struct dnaSeq *firstSeq = NULL; struct dnaSeq *secSeq = NULL; boolean goodFlag = TRUE; firstSeq = hChromSeq(bed->chrom, bed->chromStart, bed->chromStart+bed->blockSizes[0]); secSeq = hChromSeq(bed->chrom, bed->chromStart+bed->chromStarts[1], bed->chromStart + bed->chromStarts[1] + bed->blockSizes[1]); if(sameString(bed->strand,"+")) { reverseComplement(secSeq->dna, secSeq->size); if(differentString(firstSeq->dna, cseq->leftPrimer)) goodFlag = FALSE; if(differentString(secSeq->dna, cseq->rightPrimer)) goodFlag = FALSE; reverseComplement(secSeq->dna, secSeq->size); } else { reverseComplement(secSeq->dna, secSeq->size); if(differentString(firstSeq->dna, cseq->rightPrimer)) goodFlag = FALSE; if(differentString(secSeq->dna, cseq->leftPrimer)) goodFlag = FALSE; reverseComplement(secSeq->dna, secSeq->size); } if(goodFlag == FALSE) { char *rpRev = cloneString(cseq->rightPrimer); char *lpRev = cloneString(cseq->leftPrimer); char *firstBlock = cloneString(firstSeq->dna); char *secBlock = cloneString(secSeq->dna); reverseComplement(rpRev, strlen(rpRev)); reverseComplement(lpRev, strlen(lpRev)); reverseComplement(firstBlock, strlen(firstBlock)); reverseComplement(secBlock, strlen(secBlock)); warn("Problem for bed; %s on strand %s", bed->name, bed->strand); warn("leftPrimer:\t%s\t%s", cseq->leftPrimer, lpRev); warn("rightPrimer:\t%s\t%s", cseq->rightPrimer, rpRev); warn("firstBlock:\t%s\t%s", firstSeq->dna, firstBlock); warn("secBlock:\t%s\t%s", secSeq->dna, secBlock); freez(&rpRev); freez(&lpRev); freez(&firstBlock); freez(&secBlock); } dnaSeqFree(&firstSeq); dnaSeqFree(&secSeq); }
struct mafAli *hgMafFrag( char *database, /* Database, must already have hSetDb to this */ char *track, /* Name of MAF track */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ) /* mafFrag- Extract maf sequences for a region from database. * This creates a somewhat unusual MAF that extends from start * to end whether or not there are actually alignments. Where * there are no alignments (or alignments missing a species) * a . character fills in. The score is always zero, and * the sources just indicate the species. You can mafFree this * as normal. */ { int chromSize = hChromSize(database, chrom); struct sqlConnection *conn = hAllocConn(database); struct dnaSeq *native = hChromSeq(database, chrom, start, end); struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end); char masterSrc[128]; struct hash *orgHash = newHash(10); struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL; int curPos = start, symCount = 0; struct slName *name; int order = 0; /* Check that the mafs are really copacetic, the particular * subtype we think is in the database that this (relatively) * simple code can handle. */ safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom); mafCheckFirstComponentSrc(mafList, masterSrc); mafCheckFirstComponentStrand(mafList, '+'); slSort(&mafList, mafCmp); /* Prebuild organisms if possible from input orderList. */ for (name = orderList; name != NULL; name = name->next) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, name->name, org, &org->name); org->dy = dyStringNew(native->size*1.5); org->order = order++; if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, database, org, &org->name); org->dy = dyStringNew(native->size*1.5); if (nativeOrg == NULL) nativeOrg = org; } /* Go through all mafs in window, mostly building up * org->dy strings. */ for (maf = mafList; maf != NULL; maf = maf->next) { struct mafComp *mc, *mcMaster = maf->components; struct mafAli *subMaf = NULL; order = 0; if (curPos < mcMaster->start) { fillInMissing(nativeOrg, orgList, native, start, curPos, mcMaster->start); symCount += mcMaster->start - curPos; } if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst * backtracking */ { if (mafNeedSubset(maf, masterSrc, curPos, end)) { subMaf = mafSubset(maf, masterSrc, curPos, end); if (subMaf == NULL) continue; } else subMaf = maf; for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order) { /* Extract name up to dot into 'orgName' */ char buf[128], *e, *orgName; if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */ continue; mc->leftStatus = mc->rightStatus = 0; /* squash annotation */ e = strchr(mc->src, '.'); if (e == NULL) orgName = mc->src; else { int len = e - mc->src; if (len >= sizeof(buf)) errAbort("organism/database name %s too long", mc->src); memcpy(buf, mc->src, len); buf[len] = 0; orgName = buf; } /* Look up dyString corresponding to org, and create a * new one if necessary. */ org = hashFindVal(orgHash, orgName); if (org == NULL) { if (orderList != NULL) errAbort("%s is not in orderList", orgName); AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, orgName, org, &org->name); org->dy = dyStringNew(native->size*1.5); dyStringAppendMultiC(org->dy, '.', symCount); if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL && order > org->order) org->order = order; org->hit = TRUE; /* Fill it up with alignment. */ dyStringAppendN(org->dy, mc->text, subMaf->textSize); } for (org = orgList; org != NULL; org = org->next) { if (!org->hit) dyStringAppendMultiC(org->dy, '.', subMaf->textSize); org->hit = FALSE; } symCount += subMaf->textSize; curPos = mcMaster->start + mcMaster->size; if (subMaf != maf) mafAliFree(&subMaf); } } if (curPos < end) { fillInMissing(nativeOrg, orgList, native, start, curPos, end); symCount += end - curPos; } mafAliFreeList(&mafList); slSort(&orgList, oneOrgCmp); if (strand == '-') { for (org = orgList; org != NULL; org = org->next) reverseComplement(org->dy->string, org->dy->stringSize); } /* Construct our maf */ AllocVar(maf); maf->textSize = symCount; for (org = orgList; org != NULL; org = org->next) { struct mafComp *mc; AllocVar(mc); if (org == orgList) { if (outName != NULL) { mc->src = cloneString(outName); mc->srcSize = native->size; mc->strand = '+'; mc->start = 0; mc->size = native->size; } else { mc->src = cloneString(masterSrc); mc->srcSize = chromSize; mc->strand = strand; if (strand == '-') reverseIntRange(&start, &end, chromSize); mc->start = start; mc->size = end-start; } } else { int size = countAlpha(org->dy->string); mc->src = cloneString(org->name); mc->srcSize = size; mc->strand = '+'; mc->start = 0; mc->size = size; } mc->text = cloneString(org->dy->string); dyStringFree(&org->dy); slAddHead(&maf->components, mc); } slReverse(&maf->components); slFreeList(&orgList); freeHash(&orgHash); hFreeConn(&conn); return maf; }