static boolean localizationExists(struct section *section, struct sqlConnection *conn, char *geneId) /* Return TRUE if localization and existance tables exist and have something * on this one. */ { char query[256]; /* mitopred - prediction of nuclear-encoded mitochondrial proteins */ if (swissProtAcc != NULL && sqlTableExists(conn, "mitopred")) { sqlSafef(query, sizeof(query), "select count(*) from mitopred where name = '%s' or name = '%s'", swissProtAcc, spAnyAccToId(spConn, swissProtAcc)); if (sqlQuickNum(conn, query) > 0) return TRUE; } /* SGD (Sacchromyces Genome Database) localization & abundance data */ if (sqlTablesExist(conn, "sgdLocalization sgdAbundance")) { sqlSafef(query, sizeof(query), "select count(*) from sgdLocalization where name = '%s'", geneId); if (sqlQuickNum(conn, query) > 0) return TRUE; sqlSafef(query, sizeof(query), "select count(*) from sgdAbundance where name = '%s'", geneId); if (sqlQuickNum(conn, query) > 0) return TRUE; } return FALSE; }
static void synonymPrint(struct section *section, struct sqlConnection *conn, char *id) /* Print out SwissProt comments - looking up typeId/commentVal. */ { char *protAcc = getSwissProtAcc(conn, spConn, id); char *spDisplayId; char *refSeqAcc = ""; char *mrnaAcc = ""; char *oldDisplayId; char condStr[255]; char *kgProteinID; char *parAcc; /* parent accession of a variant splice protein */ char *chp; if (isRgdGene(conn)) { rgdGene2SynonymPrint(section,conn, id); return; } if (sqlTablesExist(conn, "kgAlias")) printAlias(id, conn); if (sameWord(genome, "Zebrafish")) { char *xrefTable = "ensXRefZfish"; char *geneIdCol = "ensGeneId"; /* get Gene Symbol and RefSeq accession from Zebrafish-specific */ /* cross-reference table */ printGeneSymbol(id, xrefTable, geneIdCol, conn); refSeqAcc = getRefSeqAcc(id, xrefTable, geneIdCol, conn); hPrintf("<B>ENSEMBL ID:</B> %s", id); } else { char query[256]; char *toRefTable = genomeOptionalSetting("knownToRef"); if (toRefTable != NULL && sqlTableExists(conn, toRefTable)) { safef(query, sizeof(query), "select value from %s where name='%s'", toRefTable, id); refSeqAcc = emptyForNull(sqlQuickString(conn, query)); } if (sqlTableExists(conn, "kgXref")) { safef(query, sizeof(query), "select mRNA from kgXref where kgID='%s'", id); mrnaAcc = emptyForNull(sqlQuickString(conn, query)); } if (sameWord(genome, "C. elegans")) hPrintf("<B>WormBase ID:</B> %s<BR>", id); else hPrintf("<B>UCSC ID:</B> %s<BR>", id); } if (refSeqAcc[0] != 0) { hPrintf("<B>RefSeq Accession: </B> <A HREF=\""); printOurRefseqUrl(stdout, refSeqAcc); hPrintf("\">%s</A><BR>\n", refSeqAcc); } else if (mrnaAcc[0] != 0) { safef(condStr, sizeof(condStr), "acc = '%s'", mrnaAcc); if (sqlGetField(database, "gbCdnaInfo", "acc", condStr) != NULL) { hPrintf("<B>Representative RNA: </B> <A HREF=\""); printOurMrnaUrl(stdout, mrnaAcc); hPrintf("\">%s</A><BR>\n", mrnaAcc); } else /* do not show URL link if it is not found in gbCdnaInfo */ { hPrintf("<B>Representative RNA: %s </B>", mrnaAcc); } } if (protAcc != NULL) { kgProteinID = cloneString(""); if (hTableExists(sqlGetDatabase(conn), "knownGene") && (isNotEmpty(cartOptionalString(cart, hggChrom)) && differentWord(cartOptionalString(cart, hggChrom),"none"))) { safef(condStr, sizeof(condStr), "name = '%s' and chrom = '%s' and txStart=%s and txEnd=%s", id, cartOptionalString(cart, hggChrom), cartOptionalString(cart, hggStart), cartOptionalString(cart, hggEnd)); kgProteinID = sqlGetField(database, "knownGene", "proteinID", condStr); } hPrintf("<B>Protein: "); if (strstr(kgProteinID, "-") != NULL) { parAcc = cloneString(kgProteinID); chp = strstr(parAcc, "-"); *chp = '\0'; /* show variant splice protein and the UniProt link here */ hPrintf("<A HREF=\"http://www.uniprot.org/uniprot%s\" " "TARGET=_blank>%s</A></B>, splice isoform of ", kgProteinID, kgProteinID); hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" " "TARGET=_blank>%s</A></B>\n", parAcc, parAcc); } else { hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" " "TARGET=_blank>%s</A></B>\n", protAcc, protAcc); } /* show SWISS-PROT display ID if it is different than the accession ID */ /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ spDisplayId = spAnyAccToId(spConn, protAcc); if (spDisplayId == NULL) { errAbort("<br>%s seems to no longer be a valid protein ID in our latest UniProtKB DB.", protAcc); } if (strstr(spDisplayId, protAcc) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId) && !sameWord(protAcc, oldDisplayId)) { hPrintf(" or %s", oldDisplayId); } } hPrintf(")<BR>\n"); } } printCcds(id, conn); }
static void rgdGene2SynonymPrint(struct section *section, struct sqlConnection *conn, char *rgdGeneId) { char *geneSym = NULL, *geneName = NULL; char query[256], **row; struct sqlResult *sr; if (rgdGeneId != NULL) { safef(query, sizeof(query), "select old_symbol, old_name from genes_rat where gene_rgd_id = '%s'", rgdGeneId+4L); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { if (row[0][0] != 0 && !sameString(row[0], "n/a")) { geneSym = cloneString(row[0]); hPrintf("<B>Symbol:</B> %s ", addComma(row[0])); //hPrintf("<BR>\n"); } if (row[1][0] != 0 && !sameString(row[0], "n/a")) { geneName = cloneString(row[1]); hPrintf("<BR><B>Name:</B> %s ", addComma(geneName)); hPrintf("<BR>\n"); } } sqlFreeResult(&sr); safef(query, sizeof(query), "select value from rgdGene2ToRefSeq where name= '%s'", rgdGeneId); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { hPrintf("<B>RefSeq Accession: </B> <A HREF=\""); printOurRefseqUrl(stdout, row[0]); hPrintf("\">%s</A><BR>\n", row[0]); } sqlFreeResult(&sr); safef(query, sizeof(query), "select value from rgdGene2ToUniProt where name= '%s'", rgdGeneId); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { char *spId, *spDisplayId, *oldDisplayId; spId = row[0]; hPrintf("<B>Protein: </B>"); hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" " "TARGET=_blank>%s</A>\n", spId, spId); /* show SWISS-PROT display ID if it is different than the accession ID */ /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ spDisplayId = spAnyAccToId(spConn, spId); if (spDisplayId == NULL) { errAbort("<br>The corresponding protein %s of this gene is not found in our current UniProtKB DB.", spId); } if (strstr(spDisplayId, spId) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId) && !sameWord(spId, oldDisplayId)) { hPrintf(" or %s", oldDisplayId); } } hPrintf(")<BR>\n"); } } sqlFreeResult(&sr); } }
void txGeneXref(char *genomeDb, char *uniProtDb, char *genePredFile, char *infoFile, char *pickFile, char *evFile, char *outFile) /* txGeneXref - Make kgXref type table for genes.. */ { /* Load picks into hash. We don't use cdsPicksLoadAll because empty fields * cause that autoSql-generated routine problems. */ struct hash *pickHash = newHash(18); struct hash *geneToProtHash = makeGeneToProtHash(genePredFile); struct cdsPick *pick; struct lineFile *lf = lineFileOpen(pickFile, TRUE); char *row[CDSPICK_NUM_COLS]; while (lineFileRowTab(lf, row)) { pick = cdsPickLoad(row); removePickVersions(pick); hashAdd(pickHash, pick->name, pick); } /* Load evidence into hash */ struct hash *evHash = newHash(18); struct txRnaAccs *ev, *evList = txRnaAccsLoadAll(evFile); for (ev = evList; ev != NULL; ev = ev->next) hashAdd(evHash, ev->name, ev); /* Open connections to our databases */ struct sqlConnection *gConn = sqlConnect(genomeDb); struct sqlConnection *uConn = sqlConnect(uniProtDb); /* Read in info file, and loop through it to make out file. */ struct txInfo *info, *infoList = txInfoLoadAll(infoFile); FILE *f = mustOpen(outFile, "w"); for (info = infoList; info != NULL; info = info->next) { char *kgID = info->name; char *mRNA = ""; char *spID = ""; char *spDisplayID = ""; char *geneSymbol = NULL; char *refseq = ""; char *protAcc = ""; char *description = NULL; char query[256]; char *proteinId = hashMustFindVal(geneToProtHash, info->name); boolean isAb = sameString(info->category, "antibodyParts"); pick = hashFindVal(pickHash, info->name); ev = hashFindVal(evHash, info->name); if (pick != NULL) { /* Fill in the relatively straightforward fields. */ refseq = pick->refSeq; if (info->orfSize > 0) { protAcc = pick->refProt; spID = proteinId; if (sameString(protAcc, spID)) spID = pick->uniProt; if (spID[0] != 0) spDisplayID = spAnyAccToId(uConn, spID); } /* Fill in gene symbol and description from refseq if possible. */ if (refseq[0] != 0) { struct sqlResult *sr; safef(query, sizeof(query), "select name,product from refLink where mrnaAcc='%s'", refseq); sr = sqlGetResult(gConn, query); char **row = sqlNextRow(sr); if (row != NULL) { geneSymbol = cloneString(row[0]); if (!sameWord("unknown protein", row[1])) description = cloneString(row[1]); } sqlFreeResult(&sr); } /* If need be try uniProt for gene symbol and description. */ if (spID[0] != 0 && (geneSymbol == NULL || description == NULL)) { char *acc = spLookupPrimaryAcc(uConn, spID); if (description == NULL) description = spDescription(uConn, acc); if (geneSymbol == NULL) { struct slName *nameList = spGenes(uConn, acc); if (nameList != NULL) geneSymbol = cloneString(nameList->name); slFreeList(&nameList); } } } /* If it's an antibody fragment use that as name. */ if (isAb) { geneSymbol = cloneString("abParts"); description = cloneString("Parts of antibodies, mostly variable regions."); isAb = TRUE; } if (ev == NULL) { mRNA = cloneString(""); if (!isAb) { errAbort("%s is %s but not %s\n", info->name, infoFile, evFile); } } else { mRNA = cloneString(ev->primary); chopSuffix(mRNA); } /* Still no joy? Try genbank RNA records. */ if (geneSymbol == NULL || description == NULL) { if (ev != NULL) { int i; for (i=0; i<ev->accCount; ++i) { char *acc = ev->accs[i]; chopSuffix(acc); if (geneSymbol == NULL) { safef(query, sizeof(query), "select geneName.name from gbCdnaInfo,geneName " "where geneName.id=gbCdnaInfo.geneName and gbCdnaInfo.acc = '%s'", acc); geneSymbol = sqlQuickString(gConn, query); if (geneSymbol != NULL) { if (sameString(geneSymbol, "n/a")) geneSymbol = NULL; } } if (description == NULL) { safef(query, sizeof(query), "select description.name from gbCdnaInfo,description " "where description.id=gbCdnaInfo.description " "and gbCdnaInfo.acc = '%s'", acc); description = sqlQuickString(gConn, query); if (description != NULL) { if (sameString(description, "n/a")) description = NULL; } } } } } if (geneSymbol == NULL) geneSymbol = mRNA; if (description == NULL) description = mRNA; /* Get rid of some characters that will cause havoc downstream. */ stripChar(geneSymbol, '\''); subChar(geneSymbol, '<', '['); subChar(geneSymbol, '>', ']'); /* Abbreviate geneSymbol if too long */ if (strlen(geneSymbol) > 40) strcpy(geneSymbol+37, "..."); fprintf(f, "%s\t", kgID); fprintf(f, "%s\t", mRNA); fprintf(f, "%s\t", spID); fprintf(f, "%s\t", spDisplayID); fprintf(f, "%s\t", geneSymbol); fprintf(f, "%s\t", refseq); fprintf(f, "%s\t", protAcc); fprintf(f, "%s\n", description); } carefulClose(&f); }
static void localizationPrint(struct section *section, struct sqlConnection *conn, char *geneId) /* Print out localization and abundance links. */ { char query[256], **row, *s = NULL; struct sqlResult *sr; boolean firstTime = TRUE; /* mitopred - prediction of nuclear-encoded mitochondrial proteins */ if (swissProtAcc != NULL && sqlTableExists(conn, "mitopred")) { sqlSafef(query, sizeof(query), "select confidence from mitopred where name = '%s' or name = '%s'", swissProtAcc, spAnyAccToId(spConn, swissProtAcc)); sr = sqlGetResult(conn, query); firstTime = TRUE; while ((row = sqlNextRow(sr)) != NULL) { if (firstTime) { hPrintf("<B>Mitopred:</B> mitochondrion, confidence level: "); firstTime = FALSE; } else { hPrintf(", "); } hPrintf("%s", row[0]); } sqlFreeResult(&sr); if (!firstTime) { hPrintf("<BR>"); hPrintf("Prediction of nuclear-encoded mitochondrial proteins from " "Guda et al., Bioinformatics. 2004 Jul 22;20(11):1785-94.<BR>" "For more information see " "<A HREF=\"http://mitopred.sdsc.edu/\" TARGET=_blank>" "http://mitopred.sdsc.edu/</A>.<P>"); } } /* SGD (Sacchromyces Genome Database) localization & abundance data */ if (sqlTablesExist(conn, "sgdLocalization sgdAbundance")) { sqlSafef(query, sizeof(query), "select value from sgdLocalization where name = '%s'", geneId); sr = sqlGetResult(conn, query); firstTime = TRUE; while ((row = sqlNextRow(sr)) != NULL) { if (firstTime) { hPrintf("<B>SGD Localization:</B> "); firstTime = FALSE; } else { hPrintf(", "); } hPrintf("%s", row[0]); } sqlFreeResult(&sr); if (!firstTime) { hPrintf("<BR>"); } sqlSafef(query, sizeof(query), "select abundance from sgdAbundance where name = '%s'", geneId); s = sqlQuickString(conn, query); if (s != NULL) { hPrintf("<B>SGD Abundance:</B> %s (range from 41 to 1590000)<BR>\n", s); freez(&s); } hPrintf("Protein localization data from " "Huh et al. (2003), Nature 425:686-691<BR>" "Protein abundance data from " "Ghaemmaghami et al. (2003) Nature 425:737-741<BR>" "For more information see " "<A HREF=\"http://yeastgfp.yeastgenome.org\" TARGET=_blank>" "http://yeastgfp.yeastgenome.org</A>."); } }