static void prMiscDiff(struct gbMiscDiff *gmd, unsigned miscDiffFlds) /* print any gbMiscDiff row */ { webPrintLinkTableNewRow(); webPrintIntCell(gmd->mrnaStart); webPrintIntCell(gmd->mrnaEnd); if (miscDiffFlds & gbMiscDiffGene) webPrintLinkCell(gmd->gene); if (miscDiffFlds & gbMiscDiffReplace) webPrintLinkCell(gmd->replacement); if (miscDiffFlds & gbMiscDiffNotes) webPrintLinkCell(gmd->notes); }
void correlateGraphs(struct genoGraph *aGg, struct genoGraph *bGg) /* Do correlation between two graphs */ { double r = chromGraphBinCorrelate(aGg->binFileName, bGg->binFileName); webPrintLinkCell(aGg->shortLabel); webPrintLinkCell(bGg->shortLabel); webPrintLinkCellStart(); hPrintf("%f", r); webPrintLinkCellEnd(); webPrintLinkCellStart(); hPrintf("%f", r*r); webPrintLinkCellEnd(); }
static void printControlledVocabFields(char **row, int fieldCount, struct slName *fieldList, char *vocabFile, struct hash *vocabHash) /* Print out fields from row, linking them to controlled vocab if need be. * If vocabFile is NULL, the vocabHash is not ENCODE, so links + mouseovers are * generated differently */ { int i; struct slName *field; for (i=0, field = fieldList; i<fieldCount; ++i, field = field->next) { char *link = NULL; char *fieldVal = row[i]; if (vocabFile && hashLookup(vocabHash, field->name)) { // controlled vocabulary link = wgEncodeVocabLink(vocabFile, "term", fieldVal, fieldVal, fieldVal, ""); } else if (!vocabFile && vocabHash != NULL) { // meta tables struct hash *fieldHash = hashFindVal(vocabHash, field->name); if (fieldHash != NULL) link = vocabLink(fieldHash, fieldVal, fieldVal); } webPrintLinkCell(link != NULL ? link : fieldVal); } }
static void prAlign(struct sqlConnection *conn, char *pslTbl, struct psl *psl) /* print an alignment */ { // genomic location webPrintLinkCellStart(); printf("<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</A>", hgTracksPathAndSettings(), database, psl->tName, psl->tStart+1, psl->tEnd, psl->tName, psl->tStart+1, psl->tEnd); webPrintLinkCellEnd(); // genomic span webPrintLinkCellRightStart(); printf("%d", psl->tEnd-psl->tStart); webPrintLinkCellEnd(); // strand webPrintLinkCell(psl->strand); // mRNA location, linked to aligment viewer webPrintLinkCellStart(); char other[128]; safef(other, sizeof(other), "%d&aliTable=%s", psl->tStart, pslTbl); hgcAnchorSomewhere("htcCdnaAli", psl->qName, other, psl->tName); printf("%s:%d-%d</A>", psl->qName, psl->qStart+1, psl->qEnd); webPrintLinkCellEnd(); // identity webPrintLinkCellRightStart(); printf("%.2f%%", 100.0 * pslIdent(psl)); webPrintLinkCellEnd(); // fraction aligned webPrintLinkCellRightStart(); int aligned = psl->match + psl->misMatch + psl->repMatch; printf("%.2f%%", 100.0*aligned/((float)psl->qSize)); webPrintLinkCellEnd(); }
static void rnaStructurePrint(struct section *section, struct sqlConnection *conn, char *geneId) /* Print out rnaStructure table. */ { static boolean firstTime = TRUE; static char *names[2] = {"5' UTR", "3' UTR"}; static char *tables[2] = {"foldUtr5", "foldUtr3"}; int side; if (firstTime) { rnaTrashDirsInit(tables, ArraySize(tables)); firstTime = FALSE; } webPrintLinkTableStart(); webPrintLabelCell("Region"); webPrintLabelCell("Fold Energy"); webPrintLabelCell("Bases"); webPrintLabelCell("Energy/Base"); webPrintWideCenteredLabelCell("Display As", 3); for (side = 0; side < ArraySize(names); ++side) { char *table = tables[side]; struct sqlResult *sr; char query[256], **row; safef(query, sizeof(query), "select * from %s where name = '%s'", table, geneId); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { struct rnaFold fold; int bases; char psName[128]; /* Load fold and save it as postScript. */ rnaFoldStaticLoad(row, &fold); safef(psName, sizeof(psName), "../trash/%s/%s_%s.ps", table, table, geneId); if (!fileExists(psName)) { FILE *f; f = popen("../cgi-bin/RNAplot", "w"); if (f != NULL) { fprintf(f, ">%s\n", psName); /* This tells where to put file. */ fprintf(f, "%s\n%s\n", fold.seq, fold.fold); pclose(f); } } /* Print row of table, starting with energy terms . */ hPrintf("</TR><TR>"); bases = strlen(fold.seq); webPrintLinkCell(names[side]); webPrintLinkCellStart(); hPrintf("%1.2f", fold.energy); webPrintLinkCellEnd(); webPrintLinkCellStart(); hPrintf("%d", bases); webPrintLinkCellEnd(); webPrintLinkCellStart(); hPrintf("%1.3f", fold.energy/bases); webPrintLinkCellEnd(); /* Print link to png image. */ webPrintLinkCellStart(); hPrintf("<A HREF=\"%s?%s&%s=%s&%s=%s&%s=%s\" class=\"toc\" TARGET=_blank>", geneCgi, cartSidUrlString(cart), hggMrnaFoldRegion, table, hggMrnaFoldPs, psName, hggDoRnaFoldDisplay, "picture"); hPrintf(" Picture "); hPrintf("</A>"); webPrintLinkCellEnd(); /* Print link to PostScript. */ webPrintLinkCellStart(); hPrintf("<A HREF=\"%s\" class=\"toc\">", psName); hPrintf(" PostScript "); hPrintf("</A>"); webPrintLinkCellEnd(); /* Print link to text. */ webPrintLinkCellStart(); hPrintf("<A HREF=\"%s?%s&%s=%s&%s=%s\" class=\"toc\" TARGET=_blank>", geneCgi, cartSidUrlString(cart), hggMrnaFoldRegion, table, hggDoRnaFoldDisplay, "text"); hPrintf(" Text "); hPrintf("</A>"); webPrintLinkCellEnd(); } sqlFreeResult(&sr); } webPrintLinkTableEnd(); hPrintf("<BR>The RNAfold program from the "); hPrintf("<A HREF=\"http://www.tbi.univie.ac.at/~ivo/RNA/\" TARGET=_blank>"); hPrintf("Vienna RNA Package</A> is used to perform the "); hPrintf("secondary structure predictions and folding calculations. "); hPrintf("The estimated folding energy is in kcal/mol. The more "); hPrintf("negative the energy, the more secondary structure the RNA "); hPrintf("is likely to have."); }
static void prCellLabelVal(char *label, char *val) /* print label and value as adjacent cells */ { webPrintLabelCell(label); webPrintLinkCell(val); }
static void printFactorSourceTableHits(struct factorSource *cluster, struct sqlConnection *conn, char *sourceTable, char *inputTrackTable, struct slName *fieldList, boolean invert, char *vocab) /* Put out a lines in an html table that shows assayed sources that have hits in this * cluster, or if invert is set, that have misses. */ { char *vocabFile = NULL; if (vocab) { vocabFile = cloneFirstWord(vocab); } /* Make the monster SQL query to get all assays*/ struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, inputTrackTable); struct slName *field; for (field = fieldList; field != NULL; field = field->next) sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name); sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable); boolean encodeStanford = FALSE; if (startsWith("encode3", sourceTable) || startsWith("encode4", sourceTable)) encodeStanford = TRUE; int displayNo = 0; int fieldCount = slCount(fieldList); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { int sourceId = sqlUnsigned(row[0]); boolean hit = FALSE; int i; double signal = 0.0; for (i=0; i<cluster->expCount; i++) { if (cluster->expNums[i] == sourceId) { hit = TRUE; signal = cluster->expScores[i]; break; } } if (hit ^ invert) { printf("</TR><TR>\n"); webPrintIntCell(++displayNo); if (!invert) webPrintDoubleCell(signal); webPrintLinkCell(row[1]); int i = 0; // find position of CV metadata in field list int offset = 3; struct slName *field = fieldList; for (i=0; i<fieldCount && field != NULL; ++i, field = field->next) { char *fieldVal = row[i+offset]; if (vocab) { char *link = cloneString(factorSourceVocabLink(vocabFile, field->name, fieldVal)); webPrintLinkCell(link); } else webPrintLinkCell(fieldVal); } char *table = row[2]; if (encodeStanford) { char *file = stringIn("ENCFF", table); if (!file) webPrintLinkCell(table); else { webPrintLinkCellStart(); printf("<A target='_blank'" "href='https://www.encodeproject.org/files/%s'>%s</A>", file, file); webPrintLinkCellEnd(); } } else printMetadataForTable(table); } } sqlFreeResult(&sr); freez(&vocabFile); dyStringFree(&query); }
void showCdsEvidence(char *geneName, struct trackDb *tdb, char *evTable) /* Print out stuff from cdsEvidence table. */ { struct sqlConnection *conn = hAllocConn(database); double bestScore = 0; if (sqlTableExists(conn, evTable)) { webNewSection("CDS Prediction Information"); char query[512]; sqlSafef(query, sizeof(query), "select count(*) from %s where name='%s'", evTable, geneName); if (sqlQuickNum(conn, query) > 0) { sqlSafef(query, sizeof(query), "select * from %s where name='%s' order by score desc", evTable, geneName); struct sqlResult *sr = sqlGetResult(conn, query); char **row; webPrintLinkTableStart(); webPrintLabelCell("ORF<BR>size"); webPrintLabelCell("start in<BR>transcript"); webPrintLabelCell("end in<BR>transcript"); webPrintLabelCell("source"); webPrintLabelCell("accession"); webPrintLabelCell("ad-hoc<BR>score"); webPrintLabelCell("start<BR>codon"); webPrintLabelCell("end<BR>codon"); webPrintLabelCell("piece<BR>count"); webPrintLabelCell("piece list"); webPrintLabelCell("frame"); webPrintLinkTableNewRow(); while ((row = sqlNextRow(sr)) != NULL) { struct cdsEvidence *ev = cdsEvidenceLoad(row); webPrintIntCell(ev->end - ev->start); int i; webPrintIntCell(ev->start+1); webPrintIntCell(ev->end); webPrintLinkCell(ev->source); webPrintLinkCell(ev->accession); webPrintLinkCellRightStart(); printf("%3.2f", ev->score); bestScore = max(ev->score, bestScore); webPrintLinkCellEnd(); webPrintLinkCell(ev->startComplete ? "yes" : "no"); webPrintLinkCell(ev->endComplete ? "yes" : "no"); webPrintIntCell(ev->cdsCount); webPrintLinkCellRightStart(); for (i=0; i<ev->cdsCount; ++i) { int start = ev->cdsStarts[i]; int end = start + ev->cdsSizes[i]; printf("%d-%d ", start+1, end); } webPrintLinkCellEnd(); webPrintLinkCellRightStart(); for (i=0; i<ev->cdsCount; ++i) { if (i>0) printf(","); printf("%d", ev->cdsStarts[i]%3 + 1); } webPrintLinkCellEnd(); webPrintLinkTableNewRow(); } sqlFreeResult(&sr); webPrintLinkTableEnd(); printf("This table shows CDS predictions for this transcript from a number of " "sources including alignments against UniProtKB proteins, alignments against Genbank " "mRNAs with CDS regions annotated by the sequence submitter, and " "Victor Solovyev's bestorf program. Each prediction is assigned an ad-hoc score " "score is based on several factors including the quality of " "any associated alignments, the quality of the source, and the length of the " "prediction. For RefSeq transcripts with annotated CDSs the ad-hoc score " "is over a million unless there are severe problems mapping the mRNA to the " "genome. In other cases the score generally ranges from 0 to 50,000. " "The highest scoring prediction in this table is used to define the CDS " "boundaries for this transcript.<P>If no score is 2000 or more, the transcript " "is considered non-coding. In cases where the CDS is subject to " "nonsense-mediated decay the CDS is removed. The CDS is also removed " "from transcripts when evidence points to it being in an artifact of an " "incompletely processed transcript. Specifically if the CDS is entirely " "enclosed in the 3' UTR or an intron of a refSeq or other high quality " "transcript, the CDS is removed."); } else { printf("no significant CDS prediction found, likely %s is noncoding", geneName); } } hFreeConn(&conn); }
void showTxInfo(char *geneName, struct trackDb *tdb, char *txInfoTable) /* Print out stuff from txInfo table. */ { struct sqlConnection *conn = hAllocConn(database); if (sqlTableExists(conn, txInfoTable)) { char query[512]; sqlSafef(query, sizeof(query), "select * from %s where name='%s'", txInfoTable, geneName); struct sqlResult *sr = sqlGetResult(conn, query); char **row; if ((row = sqlNextRow(sr)) != NULL) { struct txInfo *info = txInfoLoad(row); webNewSection("Transcript Information"); webPrintLinkTableStart(); webPrintLinkCell("<B>category:</B>"); webPrintLinkCell(info->category); webPrintLinkCell("<B>nonsense-mediated-decay:</B>"); webPrintLinkCell(info->nonsenseMediatedDecay ? "yes" : "no"); webPrintLinkCell("<B>RNA accession:</B>"); webPrintLinkCell(info->sourceAcc); webPrintLinkTableNewRow(); webPrintLinkCell("<B>exon count:</B>"); webPrintIntCell(info->exonCount); webPrintLinkCell("<B>CDS single in 3' UTR:</B>"); webPrintLinkCell(info->cdsSingleInUtr3 ? "yes" : "no"); webPrintLinkCell("<B>RNA size:</B>"); webPrintIntCell(info->sourceSize); webPrintLinkTableNewRow(); webPrintLinkCell("<B>ORF size:</B>"); webPrintIntCell(info->orfSize); webPrintLinkCell("<B>CDS single in intron:</B>"); webPrintLinkCell(info->cdsSingleInIntron ? "yes" : "no"); webPrintLinkCell("<B>Alignment % ID:</B>"); webPrintDoubleCell(info->aliIdRatio*100); webPrintLinkTableNewRow(); webPrintLinkCell("<B>txCdsPredict score:</B>"); webPrintDoubleCell(info->cdsScore); webPrintLinkCell("<B>frame shift in genome:</B>"); webPrintLinkCell(info->genomicFrameShift ? "yes" : "no"); webPrintLinkCell("<B>% Coverage:</B>"); webPrintDoubleCell(info->aliCoverage*100); webPrintLinkTableNewRow(); webPrintLinkCell("<B>has start codon:</B>"); webPrintLinkCell(info->startComplete ? "yes" : "no"); webPrintLinkCell("<B>stop codon in genome:</B>"); webPrintLinkCell(info->genomicStop ? "yes" : "no"); webPrintLinkCell("<B># of Alignments:</B>"); webPrintIntCell(info->genoMapCount); webPrintLinkTableNewRow(); webPrintLinkCell("<B>has end codon:</B>"); webPrintLinkCell(info->endComplete ? "yes" : "no"); webPrintLinkCell("<B>retained intron:</B>"); webPrintLinkCell(info->retainedIntron ? "yes" : "no"); webPrintLinkCell("<B># AT/AC introns</B>"); webPrintIntCell(info->atacIntrons); webPrintLinkTableNewRow(); webPrintLinkCell("<B>selenocysteine:</B>"); webPrintLinkCell(info->selenocysteine ? "yes" : "no"); webPrintLinkCell("<B>end bleed into intron:</B>"); webPrintIntCell(info->bleedIntoIntron); webPrintLinkCell("<B># strange splices:</B>"); webPrintIntCell(info->strangeSplice); webPrintLinkTableEnd(); txInfoFree(&info); } sqlFreeResult(&sr); } hFreeConn(&conn); printf("%s", "The table above summarizes many aspects of this transcripts. Here is a more \n" "detailed description of each of the fields than can fit in the label. Also\n" "see the CDS Prediction Information table below for additional information\n" "relevant to the predicted protein product if any.\n" "\n" "<UL>\n" "<LI><B>category</B> - This is either <i>coding</i>, <i>noncoding</i>,\n" "<i>antisense</i> or \n" "<i>nearCoding</i>. A coding transcript is one where the evidence is relatively\n" "good that it produces a protein. The nearCoding transcripts overlap coding\n" "transcripts by at least 20 bases on the same strand, but themselves do not seem to produce \n" "protein products. In many cases this is because they are splicing varients\n" "with introns after the stop codon, that therefore undergo nonsense mediated decay. \n" "Antisense transcripts overlap coding transcripts by at least 20 bases on the oppposite\n" "strand.\n" "The other transcripts, which are neither coding, nor overlapping coding,\n" "are categorized as noncoding.</LI>\n" "<LI><B>exon count</B> - The number of exons in the gene. Single exon genes are\n" "generally somewhat less reliable than multi-exon genes, though there are\n" "many well-known genuine single exon genes such as the Histones and the Sox \n" "family.</LI>\n" "<LI><B>ORF size</B> - The size of the open reading frame in the mRNA. Divide by\n" "three to get the size of the protein.</LI>\n" "<LI><B>txCdsPredict score</B> - The score from the txCdsPredict program. This\n" "program weighs a variety of evidence including the presence of a Kozak consensus\n" "sequence at the start codon, the length of the ORF, the presense of upstream\n" "ORFs, homology in other species, and nonsense mediated decay. In general\n" "a score over 1000 is almost certain to be a protein, and scores over 800 have\n" "about a 90% chance of being a protein.</LI>\n" "<LI><B>has start codon</B> - Indicates if the initial codon is an ATG.</LI>\n" "<LI><B>has end codon</B> - Indicates if the last codon is TAA, TAG, or TGA.</LI>\n" "<LI><B>selenocysteine</B> - Indicates if this is one of the special proteins where\n" "TGA encodes the animo acid selenocysteine rather than encoding a stop codon.</LI>\n" "<LI><B>nonsense-mediated-decay</B> - Indicates whether the final intron is more than\n" "55 bases after the stop codon. If true, then generally the mRNA will be degraded\n" "before it can produce a detectable amount of protein. Therefore when this condition\n" "is true we remove the predicted coding region from the transcript.</LI>\n" "<LI><B>CDS single in 3' UTR</B> - This is a strong indicator that the coding region\n" "(CDS) is a coincedental open reading frame rather than a true indication \n" "that the transcript codes for protein. This indicates that the coding sequence \n" "resides in a single exon, and that this exon is located entirely in the 3' UTR \n" "of another transcript that codes for a different protein not overlapping the \n" "ORF in the same frame. We remove the CDS from non-refSeq transcripts that meet\n" "this condition, which often results from a retained intron or from missing the\n" "initial parts of a transcript.</LI>\n" "<LI><B>CDS single in intron</B> - This is another strong indicator that the ORF is\n" "not real. Here the coding region (CDS) lies entirely in the intron of another\n" "transcript which has strong evidence of coding for a protein. We remove the CDS\n" "from non-refSeq transcripts that meet this condition, which generally results\n" "from a retained intron.</LI>\n" "<LI><B>frame shift in genome</B> - This only occurs for RefSeq transcripts. Here\n" "a frame shift is detected in the coding region when aligning the transcript against\n" "the genome. Since RefSeq does examine these cases carefully, it is strong evidence\n" "that the genome sequence is in error, or that the anonymous DNA doner carried\n" "a frame-shift mutation in this gene. In general there will be multiple independent\n" "cDNA clones supporting the RefSeq over the genome. In the gene display on the\n" "browser, one or two bases will be removed from the gene to keep frame intact.</LI>\n" "<LI><B>stop codon in genome</B> - This also only occurs for RefSeq transcripts, and\n" "as with the frame shifts, there is generally multiple lines of evidence suggesting\n" "sequencing error or mutation in the reference genome. In the gene display on the\n" "browser three bases will be removed from the gene to avoid the stop.</LI>\n" "<LI><B>retained intron</B> - The transcript contains what is an intron in \n" "an overlapping transcript on the same strand. In many cases this indicates\n" "that the transcript was not completely processed. Unless specific steps are\n" "taken to isolate cytoplasmic rather than nuclear RNA, a certain fraction of the\n" "RNA isolated for sequencing will be incompletely processed. Transcripts with\n" "retained introns should be viewed suspiciously, especially if they are long.\n" "However there are cases where fully mature mRNA transcripts are made with \n" "and without a particular intron, so transcripts with retained introns are not\n" "eliminated from this gene set.</LI>\n" "<LI><B>end bleed into intron</B> - Very often when an intron is retained, it is so\n" "long that the next exon is not reached and sequenced. In this case the retained\n" "intron can't be detected directly. However high values of \"end bleed\" are\n" "strongly suggestive of a retained intron. End bleed measures how far the end of a transcript extends into an intron defined by another overlapping transcript. Note\n" "however that alternative promoters and alternative polyadenylation sites can\n" "create end bleeds in fully mature transcripts.</LI>\n" "<LI><B>RNA accession</B> - The RefSeq or Genbank/EMBL/DJJ accession on which\n" "this transcript is most closely based. Note that the splice sites when possible\n" "are taken from a consensus of RNA alignments rather than just from a single RNA.\n" "For non-RefSeq genes the bases are taken from the genome rather than the RNA.\n" "However the transcript does define which introns and exons are used to build\n" "the transcript.</LI>\n" "<LI><B>RNA size</B> - The size of the RNA on which this transcript is most\n" "closely based, including the poly-A tail.</LI>\n" "<LI><B>Alignment % ID</B> - Percentage identity within of alignment of RNA\n" "to genome.</LI>\n" "<LI><B>% Coverage</B> - The percentage of the RNA covered by the alignment to\n" "genome. This excludes the poly-A tail.</LI>\n" "<LI><B># of Alignments:</B> - The number of times this RNA aligns to the genome\n" "at very high stringency. More care must be taken in interpreting genes based\n" "on transcripts with multiple alignments. We do substantial filtering to avoid\n" "pseudogenes, but extremely recent, extremely complete pseudogenes may still\n" "pass these filters and cause multiple alignments.</LI>\n" "<LI><B># of AT/AC introns</B> - The number of introns in this transcript with\n" "AT/AC rather than the usual GT/AG ends. There are roughly 300 genes with\n" "legitimate AT/AC introns.</LI>\n" "<LI><B># of strange splices</B> - The number of introns that have ends which are\n" "neither GT/AG, GC/AG, nor AT/AC. Many of these are the result of sequencing\n" "errors, or polymorphisms between the DNA donors and the RNA donors.</LI>\n" "</UL>\n"); }