void hgGetNrOmimGene(char *database, char *outFileName) /* hgGetNrOmimGene - Generate omimGene entries related to NR_xxxx RefSeq. */ { struct sqlConnection *conn2, *conn3; char query2[256], query3[256]; struct sqlResult *sr2, *sr3; char **row2, **row3; FILE *outf; char *chrom, *txStart, *txEnd; char *omimId; outf = fopen(outFileName, "w"); conn2= hAllocConn(database); conn3= hAllocConn(database); /* first get all RefSeq entries that begin with "NR_" and have related OMIM entries */ sprintf(query2,"select g.chrom, g.txStart, g.txEnd, omimId from refGene g, refLink l, omimGene o where l.mrnaAcc=g.name and g.name like 'NR_%c' and omimId <>0 limit 1000", '%'); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { chrom = row2[0]; txStart = row2[1]; txEnd = row2[2]; omimId = row2[3]; /* then check if this omimId is already in the omimGene table */ sprintf(query3,"select name from %s.omimGene where name='%s'", database, omimId); sr3 = sqlMustGetResult(conn3, query3); row3 = sqlNextRow(sr3); /* if not, create a new omimGene entry */ if (row3 == NULL) { fprintf(outf,"%s\t%s\t%s\t%s\n", chrom, txStart, txEnd, omimId); } sqlFreeResult(&sr3); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn2); hFreeConn(&conn3); fclose(outf); }
int searchProteinsInSwissProtByGene(char *queryGeneID) /* search Swiss-Prot database to see if it contains the protein Input: queryGeneID return: number of proteins found in Swiss-Prot */ { int proteinCnt; struct sqlConnection *conn; char query[256]; struct sqlResult *sr; char **row; conn = sqlConnect(UNIPROT_DB_NAME); sqlSafef(query, sizeof(query), "select count(*) from gene, displayId, accToTaxon,taxon " "where gene.val='%s' and gene.acc=displayId.acc and accToTaxon.taxon=taxon.id " "and accToTaxon.acc=gene.acc order by taxon.id", queryGeneID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) { errAbort("Error occured during mySQL query: %s\n", query); } proteinCnt = atoi(row[0]); sqlFreeResult(&sr); sqlDisconnect(&conn); return(proteinCnt); }
int main(int argc, char *argv[]) { char *database; char *outFn; struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; int iCnt; char *chp1, *chp2; if (argc != 3) usage(); database = argv[1]; conn2= hAllocConn(database); outFn = argv[2]; outf = mustOpen(outFn, "w"); sprintf(query2,"select gene_rgd_id, uniprot_id from genes_rat where uniprot_id <> \"\""); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { boolean oneGeneDone; rgdId = row2[0]; iCnt = 0; chp1 = row2[1]; oneGeneDone = FALSE; while (!oneGeneDone) { iCnt++; chp2 = strstr(chp1,","); if (chp2 != NULL) { *chp2 = '\0'; fprintf(outf, "RGD:%s\t%s\n", rgdId, chp1); chp2++; chp1 = chp2; } else { fprintf(outf, "RGD:%s\t%s\n", rgdId, chp1); oneGeneDone = TRUE; } } row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); fclose(outf); hFreeConn(&conn2); return(0); }
void webMain(struct sqlConnection *conn) /* Set up fancy web page with hotlinks bar and * sections. */ { struct section *sectionList = NULL; char query[256]; struct sqlResult *sr; char **row; sectionList = loadSectionList(conn); puts("<FORM ACTION=\"/cgi-bin/gsidSubj\" NAME=\"mainForm\" METHOD=\"GET\">\n"); /* display GSID logo image here */ printf("<img src=\"/images/gsid_header.jpg\" alt=\"\" name=\"gsid_header\" width=\"800\" height=\"86\" border=\"1\" usemap=\"#gsid_headerMap\">"); hPrintf("<br><br>"); hotLinks(); printf("<font size=\"5\"><BR><B>Subject View </B></font>"); if (sameWord(curSubjId, "")) { printf("<BR><H3>Please enter a subject ID.\n"); printf("<input type=\"text\" name=\"hgs_subj\" value=\"%s\">\n", curSubjId); cgiMakeButton("submit", "Go!"); printf("</H3>"); printf("For example: GSID4123");fflush(stdout); } else { sqlSafef(query, sizeof(query), "select subjId from %s.gsidSubjInfo where subjId = '%s'", database, curSubjId); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); sqlFreeResult(&sr); if (row != NULL) { printf( " search for another subject: "); printf("<input type=\"text\" name=\"hgs_subj\" value=\"\">\n"); cgiMakeButton("submit", "Go!"); printSections(sectionList, conn, curSubjId); } else { printf("<H3><span style='color:red;'>%s</span> is not a valid subject ID.</H3>", curSubjId); printf("<H3>Please enter a valid subject ID.\n"); printf("<input type=\"text\" name=\"hgs_subj\" value=\"%s\">\n", ""); cgiMakeButton("submit", "Go!"); printf("<BR><BR>For example: GSID4123"); printf("</H3>"); fflush(stdout); } } puts("</FORM>\n"); }
void doH1n1Gene(struct trackDb *tdb, char *item) /* Show details page for H1N1 Genes and Regions annotations track. */ { struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char query[256]; char **row; char *chrom, *chromStart, *chromEnd; char *gene=NULL; genericHeader(tdb, item); gene = item; printf("<B>Gene: </B> %s\n<BR>", gene); sqlSafef(query, sizeof query, "select chrom, chromStart, chromEnd from h1n1Gene where name='%s';", gene); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { chrom = row[0]; chromStart = row[1]; chromEnd = row[2]; printPosOnChrom(chrom, atoi(chromStart), atoi(chromEnd), NULL, FALSE, item); } sqlFreeResult(&sr); hFreeConn(&conn); htmlHorizontalLine(); printf("<H3>Protein Structure Analysis and Prediction</H3>"); printf("<B>3D Structure Prediction of consensus sequence (with variations of all selected sequences highlighted):"); printf("<BR>PDB file:</B> "); char pdbUrl[PATH_LEN]; safef(pdbUrl, sizeof(pdbUrl), "%s/%s/decoys/%s.try1-opt3.pdb.gz", getH1n1StructUrl(), item, item); // Modeller stuff char modelPdbUrl[PATH_LEN]; if (getH1n1Model(gene, modelPdbUrl)) { char *selectFile = cartOptionalString(cart, gisaidAaSeqList); struct tempName imageFile, chimeraScript, chimerax; mkH1n1StructData(gene, selectFile, NULL, &imageFile, &chimeraScript); mkChimerax(gene, modelPdbUrl, chimeraScript.forCgi, &chimerax); printf("<A HREF=\"%s\" TARGET=_blank>%s</A>, view with <A HREF=\"%s\">Chimera</A><BR>\n", modelPdbUrl, gene, chimerax.forHtml); printf("<TABLE>\n"); printf("<TR>\n"); printf("<TD ALIGN=\"center\"><img src=\"%s\"></TD>", imageFile.forHtml); printf("</TR>\n"); printf("</TABLE>\n"); } htmlHorizontalLine(); printTrackHtml(tdb); sqlFreeResult(&sr); hFreeConn(&conn); }
int main(int argc, char *argv[]) { char *database; struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; char *tableName; int expCnt; char *probeId; char *expScores; char *chp, *chp9; char *outFn; FILE *outf; if (argc != 4) usage(); database = argv[1]; tableName = argv[2]; outFn = argv[3]; outf = mustOpen(outFn, "w"); conn2= hAllocConn(database); sqlSafef(query2, sizeof query2, "select name, expCount, expScores from %s", tableName); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); expCnt = 0; while (row2 != NULL) { probeId = row2[0]; expScores = row2[2]; chp = expScores; chp9 = strstr(chp, ","); expCnt = 0; while ((chp9 != NULL) && (chp != NULL)) { *chp9 = '\0'; fprintf(outf, "%s\t%d\t%s\n", probeId, expCnt, chp); chp = chp9; chp++; expCnt++; chp9 = strstr(chp, ","); } row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn2); fclose(outf); return(0); }
int main(int argc, char *argv[]) { char *database; char *outFn; struct dnaSeq *seq; struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; if (argc != 4) usage(); database = argv[1]; conn2= hAllocConn(database); outFn = argv[2]; outf = mustOpen(outFn, "w"); tgtChrom = argv[3]; sqlSafef(query2, sizeof query2, "select secStr, name, chrom, chromStart, chromEnd, strand from evofold where chrom='%s'", tgtChrom); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { secStr = row2[0]; id = row2[1]; chrom = row2[2]; chromStart = atoi(row2[3]); chromEnd = atoi(row2[4]); strand = *row2[5]; seq = hChromSeq(database, chrom, chromStart, chromEnd); touppers(seq->dna); if (strand == '-') reverseComplement(seq->dna, seq->size); memSwapChar(seq->dna, seq->size, 'T', 'U'); safef(javaCmd, sizeof(javaCmd), "java -cp VARNAv3-7.jar fr.orsay.lri.varna.applications.VARNAcmd -sequenceDBN %s -structureDBN '%s' -o evoFold/%s/%s.png", seq->dna, secStr, chrom, id); fprintf(outf, "%s\n", javaCmd); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); fclose(outf); hFreeConn(&conn2); return(0); }
struct pbStamp *getStampData(char *stampName) /* get data for a stamp */ { struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; struct pbStamp *pbStampPtr; int i; conn2= hAllocConn(database); safef(query2, sizeof(query2), "select * from %s.pbStamp where stampName ='%s'", database, stampName); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); pbStampPtr = pbStampLoad(row2); if (row2 == NULL) { errAbort("%s stamp data not found.", stampName); } sqlFreeResult(&sr2); safef(query2, sizeof(query2), "select * from %s.%s;", database, pbStampPtr->stampTable); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); i=0; while (row2 != NULL) { tx[i] = atof(row2[0]); ty[i] = atof(row2[1]); i++; row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn2); return(pbStampPtr); }
int searchProteinsInSupportedGenomes(char *queryID, char **database) /* search existing genome databases to see if they contain the protein Input: queryID return: number of proteins found in existing genome databases output: the last genome database is stored at *database */ { int pbProteinCnt = {0}; char *gDatabase; char *org = NULL; char cond_str[255]; struct sqlConnection *conn; struct sqlConnection *connCentral; char queryCentral[256]; struct sqlResult *srCentral; char **row3; char *answer; /* get all genome DBs that support PB */ connCentral = hConnectCentral(); sqlSafef(queryCentral, sizeof(queryCentral), "select defaultDb.name, dbDb.organism from dbDb,defaultDb where hgPbOk=1 and defaultDb.name=dbDb.name"); srCentral = sqlMustGetResult(connCentral, queryCentral); row3 = sqlNextRow(srCentral); /* go through each valid genome database that has PB */ while (row3 != NULL) { gDatabase = row3[0]; org = row3[1]; conn = sqlConnect(gDatabase); sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s'", queryID); answer = sqlGetField(gDatabase, "kgSpAlias", "count(distinct spID)", cond_str); sqlDisconnect(&conn); if ((answer != NULL) && (!sameWord(answer, "0"))) { /* increase the count only by one, because new addition of splice variants to kgSpAlias would give a count of 2 for both the parent and the variant, which caused the problem when rescale button is pressed */ if (atoi(answer) > 0) pbProteinCnt++; *database = strdup(gDatabase); } row3 = sqlNextRow(srCentral); } sqlFreeResult(&srCentral); hDisconnectCentral(&connCentral); return(pbProteinCnt); }
/* check if a locusID points to a KG mRNA */ boolean checkMrna(char *locusID) { struct sqlConnection *conn, *conn2; char query2[256]; struct sqlResult *sr2; char **row2; boolean result; char cond_str[256]; char *chp; char *gbAC; char *gbID; char *knownGeneID; conn = hAllocConn(); conn2 = hAllocConn(); result = FALSE; sqlSafef(query2, sizeof query2, "select gbAC from %s.locus2Acc0 where locusID=%s and seqType='m';", tempDbName, locusID); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { gbAC = row2[0]; gbID = strdup(gbAC); chp = strstr(gbID, "."); if (chp != NULL) *chp = '\0'; sqlSafefFrag(cond_str, sizeof cond_str, "name = '%s';", gbID); knownGeneID = sqlGetField(dbName, "knownGene", "name", cond_str); if (knownGeneID != NULL) { result=TRUE; break; } row2 = sqlNextRow(sr2); } hFreeConn(&conn); hFreeConn(&conn2); sqlFreeResult(&sr2); return(result); }
static void display1(struct sqlConnection *conn, char *sampleId, char* colName) { char query[256]; struct sqlResult *sr; char **row; safef(query, sizeof(query), "select %s from gisaidSubjInfo where EPI_ISOLATE_ID='%s'", colName, sampleId); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { printf("<B>%s:</B> %s<BR>\n", colName, row[0]); fflush(stdout); } sqlFreeResult(&sr); }
static struct genbankCds getCds(struct sqlConnection *conn, struct mappingInfo *mi) /* Get CDS, return empty genebankCds if not found or can't parse */ { char query[256]; struct sqlResult *sr; struct genbankCds cds; char **row; sqlSafef(query, sizeof(query), "select cds.name " "from %s.gbCdnaInfo, %s.cds " "where gbCdnaInfo.acc=\"%s\" and gbCdnaInfo.cds=cds.id", database, database, mi->gbAcc); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if ((row == NULL) || !genbankCdsParse(row[0], &cds)) ZeroVar(&cds); /* can't get or parse cds */ sqlFreeResult(&sr); return cds; }
static struct psl *loadAlign(struct sqlConnection *conn, struct mappingInfo *mi, int start) /* load a psl that must exist */ { char rootTable[256], table[256], query[256]; boolean hasBin; struct sqlResult *sr; char **row; struct psl *psl; if (mi->suffix == NULL) safef(rootTable, sizeof(rootTable), "%s%sAli", mi->tblPre, mi->geneSet); else safef(rootTable, sizeof(rootTable), "%s%sAli%s", mi->tblPre, mi->geneSet,mi->suffix); hFindSplitTable(database, seqName, rootTable, table, &hasBin); sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tStart = %d", table, mi->pg->name, start); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); psl = pslLoad(row+hasBin); sqlFreeResult(&sr); return psl; }
int main(int argc, char *argv[]) { char *database; char *outFn; struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; int iCnt; char *chp1, *chp2, *chp9; char *geneSymbol, *location; boolean questionable; if (argc != 3) usage(); database = argv[1]; conn2= hAllocConn(database); outFn = argv[2]; outf = mustOpen(outFn, "w"); sqlSafef(query2, sizeof query2, "select omimId, concat(disorders1,' ',disorders2, ' ',disorders3), geneSymbol, location from omimGeneMap where disorders1 <>''"); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { omimId = row2[0]; iCnt = 0; chp1 = row2[1]; geneSymbol = row2[2]; location = row2[3]; chp9 = strstr(chp1, ";"); while (chp9 != NULL) { questionable = FALSE; *chp9 = '\0'; while (*chp1 == ' ') chp1++; if (*chp1 == '?') { questionable = TRUE; chp1++; } chp2 = chp9; chp2--; while (*chp2 == ' ') { *chp2 = '\0'; chp2--; } fprintf(outf, "%s\t%s\t%s\t%s\t%d\n", chp1, geneSymbol, omimId, location,questionable); chp9++; chp1 = chp9; chp9 = strstr(chp1, ";"); } while (*chp1 == ' ') chp1++; chp2 = chp1 + strlen(chp1); chp2--; while (*chp2 == ' ') { *chp2 = '\0'; chp2--; } questionable = FALSE; if (*chp1 == '?') { questionable = TRUE; chp1++; } fprintf(outf, "%s\t%s\t%s\t%s\t%d\n", chp1, geneSymbol, omimId, location, questionable); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); fclose(outf); hFreeConn(&conn2); return(0); }
int main(int argc, char *argv[]) { struct sqlConnection *conn; char query[512]; struct sqlResult *sr; char **row; char *dataBase; char *chp; char *chp9; char *feature, *xrefStr; char *Dbxref; char *DbxrefEnd = NULL; char *rgdGeneId; char *rest = NULL; FILE *outf; char *outfileName; boolean more; if (argc != 3) usage(); dataBase = argv[1]; outfileName = argv[2]; outf = mustOpen(outfileName, "w"); conn= hAllocConn(dataBase); sqlSafef(query, sizeof query,"select feature, rgdId from rgdGeneRaw0 where feature = 'gene'"); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { feature = row[0]; xrefStr = row[1]; Dbxref = row[1]; chp9 = strstr(xrefStr, ";"); if (chp9 != NULL) { *chp9 = '\0'; DbxrefEnd = chp9; } chp = Dbxref; /* get start of "RGD:" */ chp = strstr(chp, ","); chp ++; rgdGeneId = chp; /* check if there are other references beside the RGD: entry */ more = FALSE; chp = strstr(rgdGeneId, ","); if (chp != NULL) { more = TRUE; *chp = '\0'; chp++; rest = chp; } if (more) { chp9 = strstr(rest, ","); while (chp9 != NULL) { *chp9 = '\0'; fprintf(outf, "%s\t%s\n", rgdGeneId, chp); fflush(stdout); if (DbxrefEnd == chp9) { /* if end is reached, break */ break; } else { /* keep looking for next entry */ chp9++; chp = chp9; chp9 = strstr(chp, ","); } } /* print last entry */ fprintf(outf, "%s\t%s\n", rgdGeneId, chp); fflush(stdout); } row = sqlNextRow(sr); } sqlFreeResult(&sr); hFreeConn(&conn); fclose(outf); return(0); }
int getSuperfamilies2(char *proteinID) /* getSuperfamilies2() superceed getSuperfamilies() starting from hg16, it gets Superfamily data of a protein from ensemblXref3, sfAssign, and sfDes from the proteinsXXXXXX database, and placed them in arrays to be used by doSuperfamily().*/ { struct sqlConnection *conn, *conn2, *conn3; char query[MAXNAMELEN], query2[MAXNAMELEN]; struct sqlResult *sr, *sr2; char **row, **row2; char cond_str[255]; char *sfID, *seqID, *sfDesc, *region; int done; int j; char *chp, *chp2; int sfCnt; int int_start, int_end; if (!hTableExists(protDbName, "sfAssign")) return(0); if (!hTableExists(protDbName, "ensemblXref3")) return(0); conn = hAllocConn(database); conn2 = hAllocConn(database); conn3 = hAllocConn(database); sqlSafef(query2, sizeof(query), "select distinct sfID, seqID from %s.ensemblXref3 x, %s.sfAssign a where (swissAcc='%s' or tremblAcc='%s') and seqID=x.protein and protein != '' and evalue <= 0.02", protDbName, protDbName, proteinID, proteinID); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); sfCnt=0; while (row2 != NULL) { sfID = row2[0]; seqID= row2[1]; sqlSafef(query, sizeof(query), "select region from %s.sfAssign where sfID='%s' and seqID='%s' and evalue <=0.02", protDbName, sfID, seqID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { region = row[0]; for (j=0; j<sfCnt; j++) { if (sfId[j] == atoi(sfID)) goto skip; } sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s;", sfID); sfDesc = sqlGetField(protDbName, "sfDes", "description", cond_str); /* !!! refine logic here later to be defensive against illegal syntax */ chp = region; done = 0; while (!done) { chp2 = strstr(chp, "-"); *chp2 = '\0'; chp2++; sscanf(chp, "%d", &int_start); chp = chp2; chp2 = strstr(chp, ","); if (chp2 != NULL) { *chp2 = '\0'; } else { done = 1; } chp2++; sscanf(chp, "%d", &int_end); sfId[sfCnt] = atoi(sfID); sfStart[sfCnt] = int_start; sfEnd[sfCnt] = int_end; strncpy(superfam_name[sfCnt], sfDesc, MAXNAMELEN-1); sfCnt++; chp = chp2; } skip: row = sqlNextRow(sr); } sqlFreeResult(&sr); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn); hFreeConn(&conn2); hFreeConn(&conn3); return(sfCnt); }
int getSuperfamilies(char *proteinID) /* preserved here for previous older genomes. Newer genomes should be using getSuperfamilies2(). 6/16/04 Fan*/ { struct sqlConnection *conn, *conn2; char query[MAXNAMELEN]; struct sqlResult *sr; char **row; char cond_str[255]; char *genomeID, *seqID, *modelID, *eValue, *sfID, *sfDesc; char *region; int done; char *ensPep; char *transcriptName; char *chp, *chp2; int ii = 0; int int_start, int_end; if (!hTableExists(database, "sfAssign")) return(0); conn = hAllocConn(database); conn2 = hAllocConn(database); if (hTableExists(database, "ensemblXref3")) { /* use ensemblXref3 for Ensembl data release after ensembl34d */ sqlSafefFrag(cond_str, sizeof(cond_str), "tremblAcc='%s'", proteinID); ensPep = sqlGetField(database, "ensemblXref3", "protein", cond_str); if (ensPep == NULL) { sqlSafefFrag(cond_str, sizeof(cond_str), "swissAcc='%s'", proteinID); ensPep = sqlGetField(database, "ensemblXref3", "protein", cond_str); if (ensPep == NULL) return(0); } } else { if (! (hTableExists(database, "ensemblXref") || hTableExists(database, "ensTranscript") ) ) return(0); /* two steps query needed because the recent Ensembl gene_xref 11/2003 table does not have valid translation_name */ sqlSafefFrag(cond_str, sizeof(cond_str), "external_name='%s'", protDisplayID); transcriptName = sqlGetField(database, "ensGeneXref", "transcript_name", cond_str); if (transcriptName == NULL) { return(0); } else { sqlSafefFrag(cond_str, sizeof(cond_str), "transcript_name='%s';", transcriptName); ensPep = sqlGetField(database, "ensTranscript", "translation_name", cond_str); if (ensPep == NULL) { hFreeConn(&conn); return(0); } } } ensPepName = ensPep; sqlSafef(query, sizeof(query), "select * from %s.sfAssign where seqID='%s' and evalue <= 0.02;", database, ensPep); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) return(0); while (row != NULL) { genomeID = row[0]; seqID = row[1]; modelID = row[2]; region = row[3]; eValue = row[4]; sfID = row[5]; /* sfDesc = row[6]; */ /* !!! the recent Suprefamily sfAssign table does not have valid sf description */ sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s;", sfID); sfDesc = sqlGetField(database, "sfDes", "description", cond_str); /* !!! refine logic here later to be defensive against illegal syntax */ chp = region; done = 0; while (!done) { chp2 = strstr(chp, "-"); *chp2 = '\0'; chp2++; sscanf(chp, "%d", &int_start); chp = chp2; chp2 = strstr(chp, ","); if (chp2 != NULL) { *chp2 = '\0'; } else { done = 1; } chp2++; sscanf(chp, "%d", &int_end); sfId[ii] = atoi(sfID); sfStart[ii] = int_start; sfEnd[ii] = int_end; strncpy(superfam_name[ii], sfDesc, MAXNAMELEN-1); ii++; chp = chp2; } row = sqlNextRow(sr); } sqlFreeResult(&sr); hFreeConn(&conn); hFreeConn(&conn2); return(ii); }
int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn2, *conn3; char query[256], query2[256]; struct sqlResult *sr, *sr2; char **row, **row2; char *chp; FILE *o1; char *locusID; /* LocusLink ID */ char *gbAC; /* GenBank accession.version */ char *giNCBI; /* NCBI gi for the protein record associated with the CDS */ char *seqType; /* sequence type m=mRNA g=genomic u=undefined */ char *proteinAC; /* protein accession.version */ char *taxID; /* tax id */ char *locusID2; /* LocusLink ID */ char *refAC; /* Refseq accession.version */ char *giNCBI2; /* NCBI gi for the protein record associated with the CDS */ char *revStatus; /* review status */ char *proteinAC2; /* protein accession.version */ char *taxID2; /* tax id */ char *dbName; if (argc != 2) usage(); dbName = argv[1]; conn = hAllocConn(dbName); conn2= hAllocConn(dbName); conn3= hAllocConn(dbName); o1 = fopen("j.dat", "w"); sqlSafef(query2, sizeof query2, "select * from %sTemp.locus2Ref0;", dbName); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { locusID2 = row2[0]; refAC = row2[1]; giNCBI2 = row2[2]; revStatus = row2[3]; proteinAC2 = row2[4]; taxID2 = row2[5]; sqlSafef(query, sizeof query, "select * from %sTemp.locus2Acc0 where locusID=%s and seqType='m';", dbName, locusID2); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { locusID = row[0]; gbAC = row[1]; giNCBI = row[2]; seqType = row[3]; proteinAC = row[4]; taxID = row[5]; chp = strstr(gbAC, "."); if (chp != NULL) *chp = '\0'; chp = strstr(refAC, "."); if (chp != NULL) *chp = '\0'; fprintf(o1, "%s\t%s\n", gbAC, refAC); row = sqlNextRow(sr); } row2 = sqlNextRow(sr2); } fclose(o1); hFreeConn(&conn); hFreeConn(&conn2); sqlFreeResult(&sr2); mustSystem("cat j.dat|sort|uniq >mrnaRefseq.tab"); printf("mrnaRefseq.tab created.\n"); mustSystem("rm j.dat"); return(0); }
void processAlign(char *kgTempDb, char *spDb, char *alignID, int cdsCnt, FILE *outf) { struct sqlConnection *conn2, *conn3, *conn4; char query2[256], query3[256]; struct sqlResult *sr2, *sr3; char **row2, **row3; char *score; char *chrom; char *protAcc; char *mrnaID; char *ranking; int protDbId; char condStr[255]; int i; char *chp; char *isCurated; conn2= hAllocConn(kgTempDb); conn3= hAllocConn(kgTempDb); conn4= hAllocConn(kgTempDb); sqlSafef(query2, sizeof(query2), "select * from %s.kgCandidate where alignID='%s'", kgTempDb, alignID); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { mrnaID = row2[0]; chrom = row2[1]; ranking = row2[11]; /* check if it is a composite mrnaID */ /* if yes, select from entries with both protein and mrna specified */ if (alignID[0] == 'U') { chp = strstr(row2[0], "_"); *chp = '\0'; protAcc = row2[0]; chp ++; mrnaID = chp; sqlSafef(query3, sizeof(query3), "select protAcc, score from %s.protMrnaScore where mrnaAcc='%s' and protAcc='%s'", kgTempDb, mrnaID, protAcc); } else { sqlSafef(query3, sizeof(query3), "select protAcc, score from %s.protMrnaScore where mrnaAcc='%s' order by score desc", kgTempDb, mrnaID); } sr3 = sqlMustGetResult(conn3, query3); row3 = sqlNextRow(sr3); while(row3 != NULL) { protAcc = row3[0]; score = row3[1]; chp = strstr(protAcc, "-"); if (chp == NULL) { sqlSafefFrag(condStr, sizeof(condStr), "acc='%s'", protAcc); isCurated = sqlGetField(spDb, "info", "isCurated", condStr); if (sameWord(isCurated, "1")) { protDbId = 1; } else { protDbId = 2; } } else { protDbId = 4; } fprintf(outf, "%s:", chrom); for (i=0; i<cdsCnt; i++) fprintf(outf, "%s", cdsBloc[i]); fprintf(outf, "\t%s\t%d\t%8s\t%s\t%s\t%s\n", ranking, protDbId, score, mrnaID, protAcc, alignID); /* for composite type, process just one record */ if (alignID[0] == 'U') break; row3 = sqlNextRow(sr3); } sqlFreeResult(&sr3); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn2); hFreeConn(&conn3); hFreeConn(&conn4); }
int main(int argc, char *argv[]) { struct sqlConnection *conn2, *conn3, *conn4; char query2[256], query3[256], query4[256]; struct sqlResult *sr2, *sr3, *sr4; char **row2, **row3, **row4; char *aaSeq; char *accession; char *desc; FILE *outFile; char *outFileName; char *tableName; char *interProId; int maxLen, len; char *maxAcc = NULL; char *start, *end; char *maxStart=NULL, *maxEnd=NULL, *maxDesc = NULL; if (argc != 3) usage(); tableName = argv[1]; outFileName = argv[2]; outFile = mustOpen(outFileName, "w"); conn2 = hAllocConn(); conn3 = hAllocConn(); conn4 = hAllocConn(); /* loop over all InterPro entry for the specific InterPro xref table for this organism */ sprintf(query2, "select distinct interProId from proteome.%s", tableName); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { interProId = row2[0]; /* get all start/end positions of this InterPro domain */ sprintf(query3, "select accession, start, end, description from proteome.%s where interProId='%s'", tableName, interProId); sr3 = sqlMustGetResult(conn3, query3); row3 = sqlNextRow(sr3); maxLen = 0; while (row3 != NULL) { accession = row3[0]; start = row3[1]; end = row3[2]; desc = row3[3]; len = atoi(end) - atoi(start) + 1; /* remember the max len, so far */ if (len > maxLen) { maxLen = len; maxAcc = cloneString(accession); maxStart = cloneString(start); maxEnd = cloneString(end); maxDesc = cloneString(desc); } row3 = sqlNextRow(sr3); } sqlFreeResult(&sr3); /* fetch the corresponding AA sequence of the domain having the max length */ sprintf(query4, "select substring(val, %s, %d) from uniProt.protein where acc='%s'", maxStart, maxLen, maxAcc); sr4 = sqlMustGetResult(conn4, query4); row4 = sqlNextRow(sr4); if (row4 == NULL) { fprintf(stderr, "%s %s missing, exiting ...\n", maxAcc, interProId); exit(1); } else { aaSeq = row4[0]; if (maxLen >= 18) { fprintf(outFile, ">%s %s\n", interProId, maxDesc); fprintf(outFile, "%s\n", aaSeq);fflush(stdout); } } sqlFreeResult(&sr4); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn2); hFreeConn(&conn3); hFreeConn(&conn4); fclose(outFile); return(0); }
static void gadPrint(struct section *section, struct sqlConnection *conn, char *geneId) /* Print out GAD section. */ { int refPrinted = 0; boolean showCompleteGadList; char condStr[256]; char query[256]; struct sqlResult *sr; char **row; struct dyString *currentCgiUrl; char *upperDisease; char *url = cloneString("http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=gene="); char *itemName; if (url != NULL && url[0] != 0) { safef(condStr, sizeof(condStr), "k.kgId='%s' and k.geneSymbol = g.geneSymbol", geneId); itemName = sqlGetField(database, "kgXref k, gadAll g", "k.geneSymbol", condStr); showCompleteGadList = FALSE; if (cgiOptionalString("showAllRef") != NULL) { if (sameWord(cgiOptionalString("showAllRef"), "Y") || sameWord(cgiOptionalString("showAllRef"), "y") ) { showCompleteGadList = TRUE; } } currentCgiUrl = cgiUrlString(); printf("<B>Genetic Association Database: "); printf("<A HREF=\"%s'%s'\" target=_blank>", url, itemName); printf("%s</B></A>\n", itemName); printf("<BR><B>CDC HuGE Published Literature: "); printf("<A HREF=\"%s%s%s\" target=_blank>", "http://hugenavigator.net/HuGENavigator/searchSummary.do?firstQuery=", itemName, "&publitSearchType=now&whichContinue=firststart&check=n&dbType=publit&Mysubmit=go"); printf("%s</B></A>\n", itemName); /* List diseases associated with the gene */ safef(query, sizeof(query), "select distinct broadPhen from gadAll where geneSymbol='%s' and association = 'Y' order by broadPhen", itemName); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { upperDisease = replaceChars(row[0], "'", "''"); touppers(upperDisease); printf("<BR><B>Positive Disease Associations: </B>"); printf("<A HREF=\"%s%s%s%s%s\" target=_blank>", "http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=upper(DISEASE)%20like%20'%25", cgiEncode(upperDisease), "%25'%20AND%20upper(GENE)%20%20like%20'%25", itemName, "%25'"); printf("%s</B></A>\n", row[0]); row = sqlNextRow(sr); } while (row != NULL) { upperDisease = replaceChars(row[0], "'", "''"); touppers(upperDisease); printf(", <A HREF=\"%s%s%s%s%s\" target=_blank>", "http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=upper(DISEASE)%20like%20'%25", cgiEncode(upperDisease), "%25'%20AND%20upper(GENE)%20%20like%20'%25", itemName, "%25'"); printf("%s</B></A>\n", row[0]); row = sqlNextRow(sr); } sqlFreeResult(&sr); refPrinted = 0; safef(query, sizeof(query), "select broadPhen,reference,title,journal, pubMed, conclusion from gadAll where geneSymbol='%s' and association = 'Y' order by broadPhen", itemName); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) printf("<BR><B>Related Studies: </B><OL>"); while (row != NULL) { printf("<LI><B>%s </B>", row[0]); printf("<br>%s, %s, %s.\n", row[1], row[2], row[3]); if (!sameWord(row[4], "")) { printf(" [PubMed "); printf("<A HREF=\"%s%s%s'\" target=_blank>", "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=Abstract&list_uids=", row[4],"&query_hl=1&itool=genome.ucsc.edu"); printf("%s</B></A>]\n", row[4]); } printf("<br><i>%s</i>\n", row[5]); printf("</LI>\n"); refPrinted++; if ((!showCompleteGadList) && (refPrinted >= 3)) break; row = sqlNextRow(sr); } sqlFreeResult(&sr); printf("</OL>"); if ((!showCompleteGadList) && (row != NULL)) { printf("<B>   more ... </B>"); printf( "<A HREF=\"%s?showAllRef=Y&%s#gad\">click here to view the complete list</A> ", "hgGene", currentCgiUrl->string); } } }
void gsidMsa(char *database, char *table, char *baseAcc, int startPos, char *outWigFn, char *outConsFn) { struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; FILE *outf, *outf2; char base; int ii; int i = 0; int j,jj,k; int seqCnt = 0; int max, kmax, kmax2; conn2= hAllocConn(database); outf = mustOpen(outWigFn, "w"); sqlSafef(query2, sizeof query2, "select seq from %s.%s where id='%s'", database, table, baseAcc); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); baseGenomeSeq = cloneString(row2[0]); baseSeqLen=strlen(baseGenomeSeq); sqlFreeResult(&sr2); sqlSafef(query2, sizeof query2, "select * from %s.%s", database, table); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); ii=0; while (row2 != NULL) { strcpy(seqId[ii], row2[0]); strcpy(seq[ii], row2[1]); ii++; row2 = sqlNextRow(sr2); } seqCnt = ii; sqlFreeResult(&sr2); hFreeConn(&conn2); /* print header */ fprintf(outf, "browser position chr1:1-9000\n"); fprintf(outf, "track type=wiggle_0\n"); fprintf(outf, "variableStep chrom=chr1\n"); jj=0; for (j=0; j<baseSeqLen; j++) { for (k=0; k<MAXBASE; k++) cnt[i][k] = 0; for (i=0; i<seqCnt; i++) { for (k=0; k<MAXBASE; k++) { base = toupper(seq[i][j]); if (base == refBase[k]) { cnt[j][k]++; } } } max = 0; kmax = 0; kmax2= 0; for (k=0; k<MAXBASE; k++) { if (cnt[j][k] > max) { max = cnt[j][k]; /* keep track of the 2nd hightest */ kmax2 = kmax; kmax = k; } } consensusSeq[j] = refBase[kmax]; if (refBase[kmax] == '-') { consensusSeq2[j] = refBase2[kmax2]; } else { consensusSeq2[j] = refBase[kmax]; } aliSeq[j] = refBase[kmax]; identity[j] = (float)max/(float)seqCnt; if (baseGenomeSeq[j] != '-') { fprintf(outf, "%d %f\n", startPos+jj, identity[j]); jj++; } } fclose(outf); consensusSeq[baseSeqLen] = '\0'; consensusSeq2[baseSeqLen] = '\0'; outf2 = mustOpen(outConsFn, "w"); fprintf(outf2, ">%s MSA Consensus Sequence\n", table); fprintf(outf2, "%s\n", consensusSeq2); fclose(outf2); }
int main(int argc, char *argv[]) { struct sqlConnection *conn2, *conn3, *conn4; char query2[256], query3[256], query4[256]; struct sqlResult *sr2, *sr3, *sr4; char **row2, **row3, **row4; FILE *o3; char *chp; char *proteinDataDate; int maxlen = {0}; int len; char *bioentry_id; char *biodatabase_id; char *display_id; char *accession; char *division; char *biosequence_str; char *desc, *desc2; char *genenames = NULL; char *ontology_term_id; char *qualifier_value; if (argc != 2) usage(); proteinDataDate = argv[1]; conn2= hAllocConn(); conn3= hAllocConn(); conn4= hAllocConn(); o3 = fopen("allPep.tab", "w"); sqlSafef(query3, sizeof query3, "select * from biosql%s.bioentry;", proteinDataDate); sr3 = sqlMustGetResult(conn3, query3); row3 = sqlNextRow(sr3); while (row3 != NULL) { bioentry_id = row3[0]; biodatabase_id = row3[1]; display_id = row3[2]; accession = row3[3]; division = row3[5]; sqlSafef(query2, sizeof query2, "select * from biosql%s.biosequence where bioentry_id='%s';", proteinDataDate, bioentry_id); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); if (row2 != NULL) { biosequence_str = row2[4]; len = strlen(biosequence_str); if (maxlen < len) maxlen = len; } sqlSafef(query4, sizeof query4, "select * from biosql%s.bioentry_qualifier_value where bioentry_id='%s';", proteinDataDate, bioentry_id); genenames=""; desc = ""; desc2 = ""; sr4 = sqlMustGetResult(conn4, query4); row4 = sqlNextRow(sr4); if (row4 != NULL) { ontology_term_id= row4[1]; qualifier_value = row4[2]; if (strcmp(ontology_term_id, "10") == 0) { desc = qualifier_value; } chp = strstr(desc, "("); if (chp != NULL) { chp--; *chp = '\0'; chp++; desc2 = chp; } } fprintf(o3, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", bioentry_id, biodatabase_id, display_id, accession, division, genenames, desc, desc2, biosequence_str); sqlFreeResult(&sr2); sqlFreeResult(&sr4); row3 = sqlNextRow(sr3); } //fprintf(stderr, "Max AA length = %d\n", maxlen); hFreeConn(&conn2); hFreeConn(&conn3); sqlFreeResult(&sr3); fclose(o3); return(0); }
int main(int argc, char *argv[]) { FILE *inf; char *mrnaDate; int months; char dirName[PATH_MAX]; struct sqlConnection *conn, *conn3; char query[256]; struct sqlResult *sr; char **row; char *protAcc, *mrnaAcc, *matchStr; char *protSizeStr, *mrnaSizeStr; int protSize, mrnaSize, match; char *protMrnaTableName; char condStr[255]; int score; if (argc != 5) usage(); proteinDataDate = argv[1]; kgTempDb = argv[2]; genomeReadOnly = argv[3]; protMrnaTableName = argv[4]; sprintf(spDB, "sp%s", proteinDataDate); sprintf(proteinsDB, "proteins%s", proteinDataDate); sprintf(gbTempDB, "%sTemp", kgTempDb); inf = fopen("protein.lis", "r"); if ((FILE *) NULL == inf) errAbort("ERROR: Can not open input file: protein.lis"); o3 = fopen("kgBestMrna.out", "w"); if ((FILE *) NULL == o3) errAbort("ERROR: Can not open output file: kgBestMrna.out"); o7 = fopen("best.lis", "w"); if ((FILE *) NULL == o7) errAbort("ERROR: Can not open output file: best.lis"); conn = hAllocConn(genomeReadOnly); conn3= hAllocConn(genomeReadOnly); proteinCount = 0; snprintf(dirName, (size_t) sizeof(dirName), "%s", "./clusterRun" ); sqlSafef(query, sizeof query,"select qName, tName, matches, qSize, tSize from %s.%s", kgTempDb, protMrnaTableName); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { protAcc = row[0]; mrnaAcc = row[1]; matchStr = row[2]; protSizeStr = row[3]; mrnaSizeStr = row[4]; sscanf(matchStr, "%d", &match); sscanf(protSizeStr, "%d", &protSize); sscanf(mrnaSizeStr, "%d", &mrnaSize); sscanf(matchStr, "%d", &match); if ((float)match/(float)protSize > 0.3) { sqlSafefFrag(cond_str, sizeof cond_str, "acc='%s'", mrnaAcc); mrnaDate = sqlGetField(genomeReadOnly, "gbCdnaInfo", "moddate", condStr); if (mrnaDate != NULL) { months = cal_months(mrnaDate); score = mrnaSize + months*2 - (protSize - match) *50; printf("%s\t%s\t%d\n", protAcc, mrnaAcc, score);fflush(stdout); } } row = sqlNextRow(sr); } hFreeConn(&conn); hFreeConn(&conn3); fclose(o3); fclose(o7); return(0); }
/* * there are a variety of conditions that affect how FetchData is * going to work. This is an attempt to allow it to do as much * as possible, but not get overloaded. * summaryOnly is done when whole chrom summaries are requested * for statistic purposes. In those cases we do not need to go all * the way to the data to get the averages, the SQL rows are good * enough. Although even on this level there is quite a bit of * work to do on tracks such as Quality that have 180,000 rows on * just chr1. * a wiggleStats wsList is given when doing statistics, if it is * purely a data fetch operation, there is no need to do * wiggleStats and it will be a NULL pointer. * a bedList pointer is given when a returned bed list is desired. * In the case of processing a bedList, we honor the return limit * of number of bed elements via the maxBedElements. * If we are not returning a bedList and we are not doing a stats * summary, then we have an honest data fetch operation, and in * this case we honor the stated line limit of maxBedElements. * When the caller is doing this data fetch operation and states * that maxBedElements is zero, then we do all data that can be found. * This would be the case for a stats operation when only one chrom * is being worked on. */ struct wiggleData *wigFetchData(char *db, char *table, char *chromName, int winStart, int winEnd, boolean summaryOnly, boolean freeData, int tableId, boolean (*wiggleCompare)(int tableId, double value, boolean summaryOnly, struct wiggle *wiggle), char *constraints, struct bed **bedList, unsigned maxBedElements, struct wiggleStats **wsList) /* return linked list of wiggle data between winStart, winEnd */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char **row; int rowOffset; int rowCount = 0; struct wiggle *wiggle; struct hash *spans = NULL; /* List of spans encountered during load */ char spanName[128]; char whereSpan[128]; char query[256]; struct hashEl *el; int leastSpan = BIGNUM; int mostSpan = 0; int spanCount = 0; int span = 0; struct hashCookie cookie; struct wiggleData *wigData = (struct wiggleData *) NULL; struct wiggleData *wdList = (struct wiggleData *) NULL; boolean bewareConstraints = FALSE; boolean createBedList = FALSE; boolean firstSpanDone = FALSE; unsigned dataLimit = 0; unsigned dataDone = 0; boolean reachedDataLimit = FALSE; /* make sure table exists before we try to talk to it * If it does not exist, we return a null result */ if (! sqlTableExists(conn, table)) { hFreeConn(&conn); return((struct wiggleData *)NULL); } if ((struct bed **)NULL != bedList) createBedList = TRUE; /* if we are not doing a summary (== return all data) and * we are not creating a bed list, then obey the limit requested * It will be zero if they really want everything. */ if (!summaryOnly && !createBedList) dataLimit = maxBedElements; spans = newHash(0); /* a listing of all spans found here */ resetStats(&wigStatsAcc); /* zero everything */ /* Are the constraints going to interfere with our span search ? */ if (constraints) { char *c = cloneString(constraints); tolowers(c); if (stringIn("span",c)) bewareConstraints = TRUE; } if (bewareConstraints) snprintf(query, sizeof(query), "SELECT span from %s where chrom = '%s' AND %s group by span", table, chromName, constraints ); else snprintf(query, sizeof(query), "SELECT span from %s where chrom = '%s' group by span", table, chromName ); /* Survey the spans to see what the story is here */ sr = sqlMustGetResult(conn,query); while ((row = sqlNextRow(sr)) != NULL) { unsigned span = sqlUnsigned(row[0]); ++rowCount; snprintf(spanName, sizeof(spanName), "%u", span); el = hashLookup(spans, spanName); if ( el == NULL) { if (span > mostSpan) mostSpan = span; if (span < leastSpan) leastSpan = span; ++spanCount; hashAddInt(spans, spanName, span); } } sqlFreeResult(&sr); /* Now, using that span list, go through each span, fetching data */ cookie = hashFirst(spans); while ((! reachedDataLimit) && (el = hashNext(&cookie)) != NULL) { if ((struct wiggleStats **)NULL != wsList) returnStats(&wigStatsAcc,wsList,chromName,winStart,winEnd,span); resetStats(&wigStatsAcc); if (bewareConstraints) { snprintf(whereSpan, sizeof(whereSpan), "((span = %s) AND %s)", el->name, constraints); } else snprintf(whereSpan, sizeof(whereSpan), "span = %s", el->name); span = atoi(el->name); sr = hOrderedRangeQuery(conn, table, chromName, winStart, winEnd, whereSpan, &rowOffset); rowCount = 0; while ((! reachedDataLimit) && (row = sqlNextRow(sr)) != NULL) { ++rowCount; wiggle = wiggleLoad(row + rowOffset); if (wiggle->count > 0 && (! reachedDataLimit)) { wigData = wigReadDataRow(wiggle, winStart, winEnd, tableId, summaryOnly, wiggleCompare ); if (wigData) { if (firstSpanDone) accumStats(&wigStatsAcc, wigData, (struct bed **)NULL, maxBedElements, table); else accumStats(&wigStatsAcc, wigData, bedList, maxBedElements, table); dataDone += wigData->count; if (freeData) { freeMem(wigData->data); /* and mark it gone */ wigData->data = (struct wiggleDatum *)NULL; } slAddHead(&wdList,wigData); if (!createBedList && dataLimit) if (dataLimit < dataDone) reachedDataLimit = TRUE; if (createBedList && (wigStatsAcc.bedElCount > maxBedElements)) reachedDataLimit = TRUE; } } } /* perhaps last bed line */ if (!firstSpanDone && createBedList && (wigStatsAcc.bedElEnd > wigStatsAcc.bedElStart) && wigData) { struct bed *bedEl; bedEl = bedElement(wigData->chrom, wigStatsAcc.bedElStart, wigStatsAcc.bedElEnd, table, ++wigStatsAcc.bedElCount); slAddHead(bedList, bedEl); } sqlFreeResult(&sr); firstSpanDone = TRUE; } closeWibFile(); if (createBedList) slReverse(bedList); /* last stats calculation */ if ((struct wiggleStats **)NULL != wsList) returnStats(&wigStatsAcc,wsList,chromName,winStart,winEnd,span); hFreeConn(&conn); if (wdList != (struct wiggleData *)NULL) slReverse(&wdList); /* this wdList can be freed by wigFreeData */ return(wdList); } /* struct wiggleData *wigFetchData() */
int spanInUse(struct sqlConnection *conn, char *table, char *chrom, int winStart, int winEnd, struct cart *cart) /* determine span used during hgTracks display, * winEnd == 0 means whole chrom */ { struct sqlResult *sr; char query[256]; char **row; float basesPerPixel = 0.0; int spanInUse = 0; struct hashCookie cookie; int insideWidth; int minSpan = BIGNUM; int maxSpan = 0; int spanCount = 0; struct hash *spans = newHash(0); /* list of spans in this table */ struct hashEl *el; int insideX = hgDefaultGfxBorder; int pixWidth = atoi(cartUsualString(cart, "pix", DEFAULT_PIX_WIDTH )); boolean withLeftLabels = cartUsualBoolean(cart, "leftLabels", TRUE); /* winEnd less than 1 (i.e. == 0), we need to find this chrom size */ if (winEnd < 1) { safef(query, ArraySize(query), "SELECT size from chromInfo where chrom = '%s'", chrom); sr = sqlMustGetResult(conn,query); if ((row = sqlNextRow(sr)) == NULL) errAbort("spanInUse: query failed: '%s'\n", query); winEnd = sqlUnsigned(row[0]); sqlFreeResult(&sr); if (winEnd < 1) errAbort("spanInUse: failed to find valid chrom size via query: '%s'\n", query); } /* This is a time expensive query, * ~3 to 6 seconds on large chroms full of data */ safef(query, ArraySize(query), "SELECT span from %s where chrom = '%s' group by span", table, chrom); sr = sqlMustGetResult(conn,query); while ((row = sqlNextRow(sr)) != NULL) { char spanName[128]; unsigned span = sqlUnsigned(row[0]); safef(spanName, ArraySize(spanName), "%u", span); el = hashLookup(spans, spanName); if ( el == NULL) { if (span > maxSpan) maxSpan = span; if (span < minSpan) minSpan = span; ++spanCount; hashAddInt(spans, spanName, span); } } sqlFreeResult(&sr); spanInUse = minSpan; if (withLeftLabels) insideX += hgDefaultLeftLabelWidth + hgDefaultGfxBorder; insideWidth = pixWidth - insideX - hgDefaultGfxBorder; basesPerPixel = (winEnd - winStart) / insideWidth; cookie = hashFirst(spans); while ((el = hashNext(&cookie)) != NULL) { int span = sqlSigned(el->name); if ((float) span <= basesPerPixel) spanInUse = span; } return spanInUse; } /* int spanInUse() */
int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn2; char query2[256]; struct sqlResult *sr2; char **row2; char cond_str[255]; char *proteinDatabaseName; FILE *o1, *o2, *o3; FILE *fh[23]; char temp_str[1000];; char *accession; char *aaSeq; char *chp; int i, j, len; int ihi, ilow; char *answer; char *protDisplayId; int aaResCnt[30]; char aaAlphabet[30]; int aaResFound; float fvalue1, fvalue2; float p1, p2; int icnt, jcnt; char *taxon; char *database; int sortedCnt; if (argc != 4) usage(); strcpy(aaAlphabet, "WCMHYNFIDQKRTVPGEASLXZB"); proteinDatabaseName = argv[1]; taxon = argv[2]; database = argv[3]; o2 = mustOpen("pbResAvgStd.tab", "w"); for (i=0; i<20; i++) { safef(temp_str, sizeof(temp_str), "%c.txt", aaAlphabet[i]); fh[i] = mustOpen(temp_str, "w"); } conn = hAllocConn(hDefaultDb()); conn2 = hAllocConn(hDefaultDb()); safef(query2, sizeof(query2), "select proteinID from %s.knownGene;", database); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); icnt = 0; jcnt = 0; for (j=0; j<MAXRES; j++) { sumJ[j] = 0; } while (row2 != NULL) { protDisplayId = row2[0]; safef(cond_str, sizeof(cond_str), "val='%s'", protDisplayId); accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str); if (accession == NULL) { safef(cond_str, sizeof(cond_str), "acc='%s'", protDisplayId); accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str); if (accession == NULL) { verbose(2, "'%s' not found.\n", protDisplayId); goto skip; } } safef(cond_str, sizeof(cond_str), "accession='%s'", accession); answer = sqlGetField("proteins040115", "spXref2", "biodatabaseID", cond_str); if (answer == NULL) { /* this protein might be a variant splice protein, and then it won't be in spXref2 */ goto skip; } if (answer[0] != '1') { /* printf("%s not in SWISS-PROT\n", protDisplayId);fflush(stdout); */ goto skip; } safef(cond_str, sizeof(cond_str), "acc='%s'", accession); aaSeq = sqlGetField(proteinDatabaseName, "protein", "val", cond_str); if (aaSeq == NULL) { printf("Can't find peptide sequence for %s, exiting ...\n", protDisplayId); fflush(stdout); exit(1); } len = strlen(aaSeq); if (len < 100) goto skip; lenDouble = (double)len; for (j=0; j<MAXRES; j++) { aaResCnt[j] = 0; } chp = aaSeq; for (i=0; i<len; i++) { aaResFound = 0; for (j=0; j<MAXRES; j++) { if (*chp == aaAlphabet[j]) { aaResFound = 1; aaResCnt[j] ++; } } if (!aaResFound) { fprintf(stderr, "%c %d not a valid AA residue.\n", *chp, *chp); } chp++; } for (j=0; j<MAXRES; j++) { freq[icnt][j] = (double)aaResCnt[j]/lenDouble; sumJ[j] = sumJ[j] + freq[icnt][j]; } for (j=0; j<20; j++) { fprintf(fh[j], "%15.7f\t%s\n", freq[icnt][j], accession); fflush(fh[j]); } icnt++; if (icnt >= MAXN) errAbort("Too many proteins - please set MAXN to be more than %d\n", MAXN); skip: row2 = sqlNextRow(sr2); } recordCnt = icnt; recordCntDouble = (double)recordCnt; for (j=0; j<20; j++) { carefulClose(&(fh[j])); } sqlFreeResult(&sr2); hFreeConn(&conn); hFreeConn(&conn2); for (j=0; j<MAXRES; j++) { avg[j] = sumJ[j]/recordCntDouble; } for (j=0; j<20; j++) { sum = 0.0; for (i=0; i<recordCnt; i++) { sum = sum + (freq[i][j] - avg[j]) * (freq[i][j] - avg[j]); } sigma[j] = sqrt(sum/(double)(recordCnt-1)); fprintf(o2, "%c\t%f\t%f\n", aaAlphabet[j], avg[j], sigma[j]); } carefulClose(&o2); o1 = mustOpen("pbAnomLimit.tab", "w"); for (j=0; j<20; j++) { safef(temp_str, sizeof(temp_str), "cat %c.txt|sort|uniq > %c.srt", aaAlphabet[j], aaAlphabet[j]); mustSystem(temp_str); /* figure out how many unique entries */ safef(temp_str, sizeof(temp_str), "wc %c.srt > %c.tmp", aaAlphabet[j], aaAlphabet[j]); mustSystem(temp_str); safef(temp_str, sizeof(temp_str), "%c.tmp", aaAlphabet[j]); o3 = mustOpen(temp_str, "r"); mustGetLine(o3, temp_str, 1000); chp = temp_str; while (*chp == ' ') chp++; while (*chp != ' ') chp++; *chp = '\0'; sscanf(temp_str, "%d", &sortedCnt); safef(temp_str, sizeof(temp_str), "rm %c.tmp", aaAlphabet[j]); mustSystem(temp_str); /* cal hi and low cutoff threshold */ ilow = (int)((float)sortedCnt * 0.025); ihi = (int)((float)sortedCnt * 0.975); safef(temp_str, sizeof(temp_str), "%c.srt", aaAlphabet[j]); o2 = mustOpen(temp_str, "r"); i=0; for (i=0; i<ilow; i++) { mustGetLine(o2, temp_str, 1000); } sscanf(temp_str, "%f", &fvalue1); mustGetLine(o2, temp_str, 1000); sscanf(temp_str, "%f", &fvalue2); p1 = (fvalue1 + fvalue2)/2.0; for (i=ilow+1; i<ihi; i++) { mustGetLine(o2, temp_str, 1000); } sscanf(temp_str, "%f", &fvalue1); mustGetLine(o2, temp_str, 1000); sscanf(temp_str, "%f", &fvalue2); p2 = (fvalue1 + fvalue2)/2.0; carefulClose(&o2); fprintf(o1, "%c\t%f\t%f\n", aaAlphabet[j], p1, p2); fflush(stdout); for (i=0; i<recordCnt; i++) { measure[i] = freq[i][j]; } safef(temp_str, sizeof(temp_str), "pbAaDist%c.tab", aaAlphabet[j]); calDist(measure, recordCnt, 51, 0.0, 0.005, temp_str); } carefulClose(&o1); return(0); }
static void sequencePrint(struct section *section, struct sqlConnection *conn, char *subjId) /* Print out Sequence section. */ { char query[256]; struct sqlResult *sr; char **row; char *seq, *seqId; int i, l; char *chp; printf("<B>DNA Sequences</B><BR>"); sqlSafef(query, sizeof(query), "select dnaSeqId, seq from gisaidXref, dnaSeq where subjId = '%s' and id = dnaSeqId order by dnaSeqId", subjId); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) printf("<BR>Not available.<BR><BR>"); while (row != NULL) { seqId = row[0]; seq = row[1]; l =strlen(seq); hPrintf("<A NAME=\"%s\">\n", seqId); hPrintf("<pre>\n"); hPrintf("%c%s", '>', seqId); hPrintf("%s%s", ":", subjId); chp = seq; for (i=0; i<l; i++) { if ((i%50) == 0) hPrintf("\n"); hPrintf("%c", *chp); chp++; } hPrintf("</pre>"); fflush(stdout); row = sqlNextRow(sr); } sqlFreeResult(&sr); printf("<B>Protein Sequences</B><BR>"); sqlSafef(query, sizeof(query), "select aaSeqId, seq from gisaidXref, aaSeq where subjId = '%s' and aaSeqId = id order by aaSeqId", subjId); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row == NULL) printf("<BR>Not available.<BR>"); while (row != NULL) { seqId = row[0]; seq = row[1]; l =strlen(seq); hPrintf("<A NAME=\"%s\">\n", seqId); hPrintf("<pre>\n"); hPrintf("%c%s", '>', seqId); hPrintf("%s%s", ":", subjId); //hPrintf("<A NAME=\"%s\">\n", seqId); //hPrintf("><A HREF=\"../cgi-bin/pbGsid?proteinID=%s\"", seqId); //hPrintf("%c%s", '>', seqId); //hPrintf("</A>"); //hPrintf("%c%s", ':', subjId); chp = seq; for (i=0; i<l; i++) { if ((i%50) == 0) hPrintf("\n"); hPrintf("%c", *chp); chp++; } hPrintf("</pre>"); fflush(stdout); row = sqlNextRow(sr); } sqlFreeResult(&sr); return; }
int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn2, *conn3; char query[256], query2[256], query3[256]; struct sqlResult *sr, *sr2, *sr3; char **row, **row2, **row3; char *r1, *r2, *r3, *r4; FILE *o1; char *proteinDataDate; char *bio_dblink_id; char *source_bioentry_id; char *dbxref_id; char *bioentry_id; char *biodatabase_id; char *display_id; char *accession; char *entry_version; char *division; char *dbxref_id3; char *dbname; char *extAC; if (argc != 2) usage(); proteinDataDate = argv[1]; o1 = fopen("temp_spXref2.dat", "w"); conn = hAllocConn(); conn2= hAllocConn(); conn3= hAllocConn(); sprintf(query2,"select * from biosql%s.bioentry;", proteinDataDate); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { bioentry_id = row2[0]; biodatabase_id = row2[1]; display_id = row2[2]; accession = row2[3]; entry_version = row2[4]; division = row2[5]; sprintf(query, "select * from biosql%s.bioentry_direct_links where source_bioentry_id='%s';", proteinDataDate, bioentry_id); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { bio_dblink_id = row[0]; source_bioentry_id = row[1]; dbxref_id = row[2]; sprintf(query3, "select * from biosql%s.dbxref where dbxref_id=%s;", proteinDataDate, dbxref_id); sr3 = sqlMustGetResult(conn3, query3); row3 = sqlNextRow(sr3); dbxref_id3 = row3[0]; dbname = row3[1]; extAC = row3[2]; fprintf(o1, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", accession, display_id, division, dbname, extAC,bioentry_id,biodatabase_id); sqlFreeResult(&sr3); row = sqlNextRow(sr); } sqlFreeResult(&sr); row2 = sqlNextRow(sr2); } fclose(o1); sqlFreeResult(&sr2); hFreeConn(&conn); hFreeConn(&conn2); hFreeConn(&conn3); system("cat temp_spXref2.dat | sort |uniq > spXref2.tab"); system("rm temp_spXref2.dat"); return(0); }
int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn2, *conn3; char query[256], query2[256], query3[256]; struct sqlResult *sr, *sr2, *sr3; char **row, **row2, **row3; char *chp; FILE *o1, *o2; char *locusID; /* LocusLink ID */ char *gbAC; /* GenBank accession.version */ char *locusID2; /* LocusLink ID */ char *refAC; /* Refseq accession.version */ char *dbName; char cond_str[200]; char *kgID; char *mapID; char *desc; if (argc != 2) usage(); dbName = argv[1]; conn = hAllocConn(dbName); conn2= hAllocConn(dbName); conn3= hAllocConn(dbName); o1 = fopen("j.dat", "w"); o2 = fopen("jj.dat", "w"); sprintf(query2,"select * from %sTemp.locus2Ref0;", dbName); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { locusID2 = row2[0]; refAC = row2[1]; sprintf(query, "select * from %sTemp.locus2Acc0 where locusID=%s and seqType='m';", dbName, locusID2); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { locusID = row[0]; gbAC = row[1]; chp = strstr(gbAC, "."); if (chp != NULL) *chp = '\0'; chp = strstr(refAC, "."); if (chp != NULL) *chp = '\0'; sprintf(cond_str, "name='%s'", gbAC); kgID = sqlGetField(dbName, "knownGene", "name", cond_str); if (kgID != NULL) { sprintf(query3, "select * from %sTemp.keggList where locusID = '%s'", dbName, locusID); sr3 = sqlGetResult(conn3, query3); while ((row3 = sqlNextRow(sr3)) != NULL) { mapID = row3[1]; desc = row3[2]; fprintf(o1, "%s\t%s\t%s\n", kgID, locusID, mapID); fprintf(o2, "%s\t%s\n", mapID, desc); row3 = sqlNextRow(sr3); } sqlFreeResult(&sr3); } row = sqlNextRow(sr); } row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); fclose(o1); fclose(o2); hFreeConn(&conn); hFreeConn(&conn2); mustSystem("cat j.dat|sort|uniq >keggPathway.tab"); mustSystem("cat jj.dat|sort|uniq >keggMapDesc.tab"); mustSystem("rm j.dat"); mustSystem("rm jj.dat"); return(0); }