static void reactomeLink(struct pathwayLink *pl, struct sqlConnection *conn, char *geneId) { char condStr[255]; char *spID, *chp; struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; char *eventDesc; char *eventID; /* check the existence of kgXref table first */ if (isRgdGene(conn)) { if (!sqlTableExists(conn, "rgdGene2Xref")) return; } else { if (!sqlTableExists(conn, "kgXref")) return; } if (isRgdGene(conn)) { sqlSafefFrag(condStr, sizeof(condStr), "name='%s'", geneId); spID = sqlGetField(database, "rgdGene2ToUniProt", "value", condStr); } else { sqlSafefFrag(condStr, sizeof(condStr), "kgID='%s'", geneId); spID = sqlGetField(database, "kgXref", "spID", condStr); } if (spID != NULL) { /* convert splice variant UniProt ID to its main root ID */ chp = strstr(spID, "-"); if (chp != NULL) *chp = '\0'; hPrintf( "<BR>Protein %s (<A href=\"http://www.reactome.org/cgi-bin/link?SOURCE=UniProt&ID=%s\" TARGET=_blank>Reactome details)</A> participates in the following event(s):<BR><BR>" , spID, spID); conn2= hAllocConn(database); sqlSafef(query2,sizeof(query2), "select eventID, eventDesc from proteome.spReactomeEvent where spID='%s'", spID); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { eventID = row2[0]; eventDesc = row2[1]; hPrintf( "<A href=\"http://www.reactome.org/cgi-bin/eventbrowser?DB=gk_current&ID=%s\" TARGET=_blank>%s</A> %s<BR>\n", eventID, eventID, eventDesc); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn2); } }
int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn2; char query2[256]; struct sqlResult *sr2; char **row2; char cond_str[256]; char *kgID; char *proteinID; char *seq; char *acc; char protDbName[100]; char spDbName[100]; char *dbName; char *ro_dbName; FILE *o1, *o2; struct dnaSeq *kgSeq; if (argc != 4) usage(); o1 = fopen("j.dat", "w"); o2 = fopen("jj.dat", "w"); dbName = argv[1]; ro_dbName = argv[3]; sprintf(protDbName, "proteins%s", argv[2]); sprintf(spDbName, "sp%s", argv[2]); conn= hAllocConn(ro_dbName); conn2= hAllocConn(ro_dbName); sqlSafef(query2, sizeof query2, "select name from %s.knownGene;", dbName); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { kgID = row2[0]; sqlSafefFrag(cond_str, sizeof cond_str, "name = '%s';", kgID); seq = sqlGetField(dbName, "knownGenePep", "seq", cond_str); if (seq != NULL) { fprintf(o1, "%s\t%s\n", kgID, seq);fflush(o1); } else { sqlSafefFrag(cond_str, sizeof cond_str, "name = '%s';", kgID); proteinID=sqlGetField(dbName, "knownGene", "proteinID", cond_str); if (proteinID != NULL) { sqlSafefFrag(cond_str, sizeof cond_str, "val = '%s';", proteinID); acc = sqlGetField(spDbName, "displayId", "acc", cond_str); if (acc == NULL) { fprintf(stderr, "NO acc.displayId.%s: %s from name.knownGene.%s: %s\n", spDbName, proteinID, dbName, kgID); fflush(stderr); } else { sqlSafefFrag(cond_str, sizeof cond_str, "acc = '%s';", acc); seq = sqlGetField(spDbName, "protein", "val", cond_str); if (seq == NULL) { fprintf(stderr, "NO protein seq for %s\n", kgID); fprintf(stderr, "proteinID.knownGene.%s: %s, acc.displayID.%s: %s\n", dbName, proteinID, spDbName, acc); fflush(stderr); } else { fprintf(o1, "%s\t%s\n", kgID, seq); } } } else { fprintf(stderr, "kgID: %s not in knownGenePep or knownGene\n", kgID); } } sqlSafefFrag(cond_str, sizeof cond_str, "name = '%s';", kgID); seq = sqlGetField(dbName, "knownGeneMrna", "seq", cond_str); if (seq != NULL) { fprintf(o2, "%s\t%s\n", kgID, seq);fflush(o1); } else { kgSeq = hGenBankGetMrna(dbName, kgID, NULL); if (kgSeq != NULL) { fprintf(o2, "%s\t%s\n", kgID, kgSeq->dna);fflush(o1); } else { fprintf(stderr, "NO mRNA seq for %s\n", kgID);fflush(stderr); } } row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn); hFreeConn(&conn2); fclose(o1); fclose(o2); mustSystem("cat j.dat |sort|uniq > knownGenePep.tab"); mustSystem("cat jj.dat|sort|uniq > knownGeneMrna.tab"); mustSystem("rm j.dat"); mustSystem("rm jj.dat"); return(0); }
int main(int argc, char *argv[]) { struct sqlConnection *conn; FILE *inf; FILE *o1; char cond_str[256]; char *database; char *proteinFileName; char *outputFileName; char *answer; char *alias; char *id; char *chp0, *chp1, *chp2, *chp; char *kgID; char line[2000]; if (argc != 4) usage(); database = cloneString(argv[1]); proteinFileName = cloneString(argv[2]); outputFileName = cloneString(argv[3]); conn = hAllocConn(database); o1 = mustOpen(outputFileName, "w"); if ((inf = mustOpen(proteinFileName, "r")) == NULL) { fprintf(stderr, "Can't open file %s.\n", proteinFileName); exit(8); } while (fgets(line, 1000, inf) != NULL) { chp = strstr(line, "ID "); if (chp != line) { fprintf(stderr, "expected ID line, but got: %s\n", line); exit(1); } chp = chp + strlen("ID "); id = chp; chp = strstr(id, " "); *chp = '\0'; id = strdup(id); sqlSafefFrag(cond_str, sizeof cond_str, "proteinID = '%s'", id); answer = sqlGetField(database, "knownGene", "name", cond_str); kgID = NULL; if (answer != NULL) { kgID = strdup(answer); } if (fgets(line, 1000, inf) == NULL) { break; } do { /* "//" signal end of a record */ if ((line[0] == '/') && (line[1] == '/')) break; // work on GN (Gene Name) line only chp = strstr(line, "GN "); if (chp != NULL) { chp = line + strlen(line) -2; if (*chp == '.') { *chp = '\0'; } else { chp++; *chp = '\0'; } chp0 = line + 5; while (chp0 != NULL) { while (*chp0 == ' ') chp0++; chp1 = strstr(chp0, " OR "); chp2 = strstr(chp0, " AND "); chp = NULL; if (chp1 != NULL) { if (chp2 != NULL) { if (chp1 < chp2) { chp = chp1; } else { chp = chp2; } } else { chp = chp1; } } if (chp2!= NULL) { if (chp1 != NULL) { if (chp1 < chp2) { chp = chp1; } else { chp = chp2; } } else { chp = chp2; } } if (chp == NULL) { alias = strdup(chp0); chp0 = NULL; } else { *chp = '\0'; alias = strdup(chp0); chp0 = chp+4; } if (kgID != NULL) { // clean up "(XXXX" or "XXXX)" if (*alias == '(') alias++; chp = strstr(alias, ")"); if (chp != NULL) *chp = '\0'; fprintf(o1, "%s\t%s\n", kgID, alias); } } } } while (fgets(line, 1000, inf) != NULL); } fclose(o1); hFreeConn(&conn); return(0); }