void fixHarbisonMotifs(char *database) /* fixHarbisonMotifs - Trim motifs that have beginning or ending columns that * are degenerate.. */ { char *motifTable = "transRegCodeMotif"; char *siteTable = "transRegCode"; struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[512], **row; struct motifSize *msList = NULL, *ms; struct hash *msHash = newHash(16); boolean anyMinNotMax = FALSE; boolean anyMissingMotif = FALSE; boolean anyMotifNotFound = FALSE; struct dnaMotif *motif; /* Stream through site table collecting data about motif sizes. */ safef(query, sizeof(query), "select name,chromEnd-chromStart from %s", siteTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; int size = atoi(row[1]); ms = hashFindVal(msHash, name); if (ms == NULL) { AllocVar(ms); hashAddSaveName(msHash, name, ms, &ms->name); ms->minSize = ms->maxSize = size; slAddHead(&msList, ms); } else { if (size < ms->minSize) ms->minSize = size; if (size > ms->maxSize) ms->maxSize = size; } } sqlFreeResult(&sr); /* Go through and report if minSize != maxSize. */ for (ms = msList; ms != NULL; ms = ms->next) { if (ms->minSize != ms->maxSize) { anyMinNotMax = TRUE; warn("%s size inconsistent: min %d, max %d", ms->name, ms->minSize, ms->maxSize); } } if (!anyMinNotMax) warn("All sizes agree in %s", siteTable); /* Stream through motifs and add to msList. */ safef(query, sizeof(query), "select * from %s", motifTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { motif = dnaMotifLoad(row); ms = hashFindVal(msHash, motif->name); if (ms == NULL) { anyMissingMotif = TRUE; warn("Motif %s is in %s but not %s", motif->name, motifTable, siteTable); } else { ms->motif = motif; } } sqlFreeResult(&sr); if (!anyMissingMotif) warn("All motifs in %s are also in %s", motifTable, siteTable); /* Make sure that all items in msList have a motif. */ for (ms = msList; ms != NULL; ms = ms->next) { if (ms->motif == NULL) { anyMotifNotFound = TRUE; warn("Motif %s is in %s but not %s", ms->name, siteTable, motifTable); } } if (!anyMotifNotFound) warn("All motifs in %s are also in %s", siteTable, motifTable); /* Loop through table and deal with motifs that have different number * of columns in motif and site tables. */ for (ms = msList; ms != NULL; ms = ms->next) { motif = ms->motif; if (motif != NULL && ms->minSize == ms->maxSize) { if (motif->columnCount != ms->minSize) { warn("Motif %s has %d columns in %s but %d columns in %s", ms->name, ms->minSize, siteTable, motif->columnCount, motifTable); fixMotif(motif, ms->minSize, motifTable, conn); } } } sqlDisconnect(&conn); }
void hgGnfMicroarray(char *expTable, char *dataTable, char *atlasFile) /** Main function that does all the work for new-style*/ { struct lineFile *lf = lineFileOpen(atlasFile, TRUE); char *line; int i, wordCount, expCount; char **row; float *data; char *affyId; struct hash *hash = newHash(17); FILE *f = NULL; int dataCount = 0; /* Open Atlas file and use first line to create experiment table. */ if (!lineFileNextReal(lf, &line)) errAbort("%s is empty", lf->fileName); if (startsWith("Affy", line)) line += 4; if (startsWith("Gene Name", line)) line += 9; if (line[0] != '\t') errAbort("%s doesn't seem to be a new format atlas file", lf->fileName); expCount = lineToExpTable(line+1, expTable); if (expCount <= 0) errAbort("No experiments in %s it seems", lf->fileName); warn("%d experiments\n", expCount); f = hgCreateTabFile(tabDir, dataTable); AllocArray(row, expCount); AllocArray(data, expCount); while (lineFileNextReal(lf, &line)) { affyId = nextWord(&line); wordCount = chopByWhite(line, row, expCount); if (wordCount != expCount) errAbort("Expecting %d data points, got %d line %d of %s", expCount, wordCount, lf->lineIx, lf->fileName); if (chopName != NULL) { char *e = stringIn(chopName, affyId); if (e != NULL) *e = 0; } if (hashLookup(hash, affyId)) { warn("Duplicate %s, skipping all but first.", affyId); continue; } for (i=0; i<expCount; ++i) { data[i] = sqlFloat(row[i]); } shortDataOut(f, affyId, expCount, data); ++dataCount; if (limit != 0 && dataCount >= limit) break; } lineFileClose(&lf); if (doLoad) { struct sqlConnection *conn = sqlConnect(database); expDataCreateTable(conn, dataTable); hgLoadTabFile(conn, tabDir, dataTable, &f); hgRemoveTabFile(tabDir, dataTable); sqlDisconnect(&conn); } }
void addFile(char *fileName) /* Add all the words in file to database. */ { struct lineFile *lf = lineFileOpen(fileName, FALSE); char *line; int lineSize; char wordBuf[1024]; int lineBuf[512]; int highestWordId = 0; int wordCount; char *nullPt = NULL; struct hash *wordHash = newHash(16); struct hashEl *hel; struct dyString *query = newDyString(512); struct sqlConnection *conn = sqlConnect(database); int i; int lineCount = 0; while (lineFileNext(lf, &line, &lineSize)) { /* Chop the line up into words. */ int wordCount = 0; int startWord; int wordSize; int sizeLeft; int wordId; int i; for (sizeLeft = lineSize; sizeLeft > 0; sizeLeft -= wordSize, line += wordSize) { wordSize = tokSize(line, sizeLeft); if (wordSize >= sizeof(wordBuf)) errAbort("Word too long line %d of %s", lf->lineIx, lf->fileName); memcpy(wordBuf, line, wordSize); wordBuf[wordSize] = 0; if (wordBuf[wordSize-1] == ' ') wordBuf[wordSize-1] = '_'; if ((hel = hashLookup(wordHash, wordBuf)) == NULL) { wordId = highestWordId++; hel = hashAdd(wordHash, wordBuf, nullPt + wordId); dyStringClear(query); if (wordBuf[0] == '\'') { dyStringPrintf(query, "INSERT into word values (%d, \"'\")", wordId); } else if (wordBuf[0] == '\\') { dyStringPrintf(query, "INSERT into word values (%d, '\\\\')", wordId); } else { dyStringPrintf(query, "INSERT into word values (%d, '%s')", wordId, wordBuf); } sqlGetResult(conn, query->string); } else { wordId = (char *)(hel->val) - nullPt; } if (wordCount >= ArraySize(lineBuf)) { errAbort("Too many words in line %d of %s", lf->lineIx, lf->fileName); } lineBuf[wordCount++] = wordId; } /* Store the words in the database */ dyStringClear(query); dyStringPrintf(query, "INSERT into commaLine values (%d, %d, '", lineCount, wordCount); for (i=0; i<wordCount; ++i) dyStringPrintf(query, "%d,", lineBuf[i]); dyStringAppend(query, "')"); sqlGetResult(conn, query->string); dyStringClear(query); dyStringPrintf(query, "INSERT into lineSize values (%d,%d)", lineCount, wordCount); sqlGetResult(conn, query->string); for (i=0; i<wordCount; ++i) { dyStringClear(query); dyStringPrintf(query, "INSERT into lineWords values (%d,%d,%d)", lineCount, lineBuf[i], i); sqlGetResult(conn, query->string); } ++lineCount; } sqlDisconnect(&conn); }
static void domainsPrint(struct section *section, struct sqlConnection *conn, char *geneId) /* Print out protein domains. */ { char *db = sqlGetDatabase(conn); struct slName *el, *list; list = spExtDbAcc1List(spConn, swissProtAcc, "Interpro"); if (list != NULL) { char query[256], **row, **row2; struct sqlResult *sr, *sr2; hPrintf("<B>InterPro Domains: </B> "); hPrintf("<A HREF=\"http://www.ebi.ac.uk/interpro/protein/%s\" TARGET=_blank>", swissProtAcc); hPrintf("Graphical view of domain structure</A><BR>"); sqlSafef(query, sizeof(query), "select extAcc1,extAcc2 from extDbRef,extDb" " where extDbRef.acc = '%s'" " and extDb.val = 'Interpro' and extDb.id = extDbRef.extDb" , swissProtAcc); sr = sqlGetResult(spConn, query); while ((row = sqlNextRow(sr)) != NULL) { //hPrintf("<A HREF=\"http://www.ebi.ac.uk/interpro/entry/%s\" TARGET=_blank>", row[0]); //hPrintf("%s</A> - %s<BR>\n", row[0], row[1]); char interPro[256]; char *pdb = hPdbFromGdb(db); safef(interPro, 128, "%s.interProXref", pdb); if (hTableExists(db, interPro)) { sqlSafef(query, sizeof(query), "select description from %s where accession = '%s' and interProId = '%s'", interPro, swissProtAcc, row[0]); sr2 = sqlGetResult(conn, query); if ((row2 = sqlNextRow(sr2)) != NULL) { hPrintf("<A HREF=\"http://www.ebi.ac.uk/interpro/entry/%s\" TARGET=_blank>", row[0]); hPrintf("%s</A> - %s <BR>\n", row[0], row2[0]); } sqlFreeResult(&sr2); } else { hPrintf("<A HREF=\"http://www.ebi.ac.uk/interpro/entry/%s\" TARGET=_blank>", row[0]); hPrintf("%s</A> - %s<BR>\n", row[0], row[1]); } } hPrintf("<BR>\n"); slFreeList(&list); } if (kgVersion == KG_III) { /* Do Pfam domains here. */ list = getPfamDomainList(conn, geneId); if (list != NULL) { hPrintf("<B>Pfam Domains:</B><BR>"); for (el = list; el != NULL; el = el->next) { char query[256]; char *description; sqlSafef(query, sizeof(query), "select description from pfamDesc where pfamAC='%s'", el->name); description = sqlQuickString(conn, query); if (description == NULL) description = cloneString("n/a"); hPrintf("<A HREF=\"http://pfam.xfam.org/family?acc=%s\" TARGET=_blank>", el->name); hPrintf("%s</A> - %s<BR>\n", el->name, description); freez(&description); } slFreeList(&list); hPrintf("<BR>\n"); } /* Do SCOP domains here */ list = getDomainList(conn, geneId, "Scop"); if (list != NULL) { hPrintf("<B>SCOP Domains:</B><BR>"); for (el = list; el != NULL; el = el->next) { char query[256]; char *description; sqlSafef(query, sizeof(query), "select description from scopDesc where acc='%s'", el->name); description = sqlQuickString(conn, query); if (description == NULL) description = cloneString("n/a"); hPrintf("<A HREF=\"http://scop.berkeley.edu/sunid=%s\" TARGET=_blank>", el->name); hPrintf("%s</A> - %s<BR>\n", el->name, description); freez(&description); } slFreeList(&list); hPrintf("<BR>\n"); } } else { list = spExtDbAcc1List(spConn, swissProtAcc, "Pfam"); if (list != NULL) { char *pfamDescSql = genomeSetting("pfamDescSql"); hPrintf("<B>Pfam Domains:</B><BR>"); for (el = list; el != NULL; el = el->next) { char query[256]; char *description; sqlSafef(query, sizeof(query), pfamDescSql, el->name); description = sqlQuickString(conn, query); if (description == NULL) description = cloneString("n/a"); hPrintf("<A HREF=\"http://pfam.xfam.org/family?acc=%s\" TARGET=_blank>", el->name); hPrintf("%s</A> - %s<BR>\n", el->name, description); freez(&description); } slFreeList(&list); hPrintf("<BR>\n"); } } list = spExtDbAcc1List(spConn, swissProtAcc, "PDB"); if (list != NULL) { struct sqlConnection *conn2 = sqlConnect(db); char query[256], **row; struct sqlResult *sr; int column = 0, maxColumn=3, rowCount=0; hPrintf("<B>Protein Data Bank (PDB) 3-D Structure</B><BR>"); sqlSafef(query, sizeof(query), "select extAcc1,extAcc2 from extDbRef,extDb" " where extDbRef.acc = '%s'" " and extDb.val = 'PDB' and extDb.id = extDbRef.extDb" , swissProtAcc); sr = sqlGetResult(spConn, query); hPrintf("<TABLE><TR>\n"); while ((row = sqlNextRow(sr)) != NULL) { if (++column > maxColumn) { hPrintf("</TR><TR>"); column = 1; if (rowCount == 0) { hPrintf("<TD ALIGN=CENTER COLSPAN=4><I>To conserve bandwidth, only the images from the first %d structures are shown.</I>", maxColumn); hPrintf("</TR><TR>"); } ++rowCount; } hPrintf("<TD>"); hPrintf("<A HREF=\"http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId=%s\" TARGET=_blank>", row[0]); if (rowCount < 1) hPrintf("<IMG SRC=\"http://www.rcsb.org/pdb/images/%s_asym_r_250.jpg\"><BR>", row[0]); hPrintf("%s</A> - %s ", row[0], row[1]); // include links LS-SNP and to launch viewer in PDB chimera struct tempName chimerax; lsSnpPdbChimeraSnpAnn(conn, row[0], NULL, &chimerax); hPrintf(" <A HREF=\"%s\">Chimera</A>", chimerax.forHtml); if (lsSnpPdbHasPdb(conn2, row[0])) hPrintf(" <A HREF=\"%s\" TARGET=_blank>LS-SNP</A>", lsSnpPdbGetUrlPdbSnp(row[0], NULL)); hPrintf("</TD>\n"); } hPrintf("</TR></TABLE>\n"); hPrintf("<A href=\"../goldenPath/help/chimera.html\" TARGET=_blank>Chimera help</A>\n"); hPrintf("<BR><BR>\n"); slFreeList(&list); sqlDisconnect(&conn2); } /* Do modBase link. */ { hPrintf("<B>ModBase Predicted Comparative 3D Structure on "); modBaseAnchor(swissProtAcc); hPrintf("%s", swissProtAcc); hPrintf("</A></B><BR>\n"); hPrintf("<TABLE><TR>"); hPrintf("<TD>"); modBaseAnchor(swissProtAcc); hPrintf("\n<IMG SRC=\"https://modbase.compbio.ucsf.edu/modbase-cgi/image/modbase.jpg?database_id=%s\"></A></TD>", swissProtAcc); hPrintf("<TD>"); modBaseAnchor(swissProtAcc); hPrintf("\n<IMG SRC=\"https://modbase.compbio.ucsf.edu/modbase-cgi/image/modbase.jpg?database_id=%s&axis=x°ree=90\"></A></TD>", swissProtAcc); hPrintf("<TD>"); modBaseAnchor(swissProtAcc); hPrintf("\n<IMG SRC=\"https://modbase.compbio.ucsf.edu/modbase-cgi/image/modbase.jpg?database_id=%s&axis=y°ree=90\"></A></TD>", swissProtAcc); hPrintf("</TR><TR>\n"); hPrintf("<TD ALIGN=CENTER>Front</TD>"); hPrintf("<TD ALIGN=CENTER>Top</TD>"); hPrintf("<TD ALIGN=CENTER>Side</TD>"); hPrintf("</TR></TABLE>\n"); hPrintf("<I>The pictures above may be empty if there is no " "ModBase structure for the protein. The ModBase structure " "frequently covers just a fragment of the protein. You may " "be asked to log onto ModBase the first time you click on the " "pictures. It is simplest after logging in to just click on " "the picture again to get to the specific info on that model.</I>"); } }
void spDbAddVarSplice(char *database, char *inFile, char *outDir) /* spDbAddVarSplice - This adds information on the varient splices to the sp/uniProt database. */ { struct sqlConnection *conn = sqlConnect(database); char query[256]; makeDir(outDir); FILE *varProtein = openToWrite(outDir, "varProtein.txt"); FILE *varAcc = openToWrite(outDir, "varAcc.txt"); FILE *varDisplayId = openToWrite(outDir, "varDisplayId.txt"); FILE *varAccToTaxon = openToWrite(outDir, "varAccToTaxon.txt"); FILE *varDescription = openToWrite(outDir, "varDescription.txt"); FILE *varGene = openToWrite(outDir, "varGene.txt"); FILE *varGeneLogic = openToWrite(outDir, "varGeneLogic.txt"); struct lineFile *lf = lineFileOpen(inFile, TRUE); aaSeq seq; ZeroVar(&seq); while (faPepSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { char *row[4]; char *name = seq.name; if (startsWith("sp|", name)) // Skip over sp| introduced Aug 2009 name += 3; int rowSize = chopString(name, "-|", row, ArraySize(row)); if (rowSize != 3) errAbort("Expecting name to be in format accession-N|DISP_ID, got %s\n", name); char *acc = row[0]; char *version = row[1]; char *displayId = row[2]; int accLen = strlen(acc); int verLen = strlen(version); int displayIdLen = strlen(displayId); /* Do some tests. */ if ((accLen != 6 && accLen != 10) || isdigit(acc[0]) || !isdigit(acc[accLen-1])) errAbort("wierd accession %s before line %d of %s", acc, lf->lineIx, lf->fileName); if (!isdigit(version[0]) || verLen > 4) errAbort("wierd version %s before line %d of %s", version, lf->lineIx, lf->fileName); if (countChars(displayId, '_') != 1 || displayIdLen < 6 || displayIdLen > 16) errAbort("wierd displayId %s before line %d of %s", displayId, lf->lineIx, lf->fileName); if (accLen + 1 + verLen >= sizeof(SpAcc)) errAbort("Need to increase size of SpAcc in spDb.h because of %s-%s - need %d characters but only have %lu", acc, version, accLen + 1 + verLen, sizeof(SpAcc)); /* Print out parsed results. */ fprintf(varAcc, "%s-%s\t%s\t%s\n", acc, version, acc, version); fprintf(varProtein, "%s-%s\t%s\n", acc, version, seq.dna); fprintf(varDisplayId, "%s-%s\t%s-%s\n", acc, version, acc, version); /* Look up taxon of base protein and use it to write to varAccToTaxon table. */ int taxon = spTaxon(conn, acc); fprintf(varAccToTaxon, "%s-%s\t%d\n", acc, version, taxon); /*Transfer description. */ char *description = spDescription(conn, acc); fprintf(varDescription, "%s-%s\t%s\n", acc, version, description); freez(&description); /* Transfer gene logic. */ sqlSafef(query, sizeof(query), "select val from geneLogic where acc = '%s'", acc); char *geneLogic = sqlQuickString(conn, query); if (geneLogic != NULL) fprintf(varGeneLogic, "%s-%s\t%s\n", acc, version, geneLogic); freez(&geneLogic); /* Transfer genes. */ struct slName *gene, *geneList = spGenes(conn, acc); for (gene = geneList; gene != NULL; gene = gene->next) fprintf(varGene, "%s-%s\t%s\n", acc, version, gene->name); slFreeList(&geneList); } carefulClose(&varAcc); carefulClose(&varProtein); carefulClose(&varDisplayId); carefulClose(&varAccToTaxon); carefulClose(&varDescription); carefulClose(&varGene); carefulClose(&varGeneLogic); sqlDisconnect(&conn); }
void expFind(char *assembly, char *file) /* Find experiments in metaDb and output .ra file */ { verbose(1, "Finding experiments in %s:%s\n", assembly, mdb); struct sqlConnection *connMeta; struct mdbObj *meta = NULL, *metas = NULL; struct encodeExp *exp = NULL, *exps = NULL; struct hash *oldExps, *newExps; char *key; int expNum = 0; FILE *f = mustOpen(file, "w"); /* create hash of keys for existing experiments so we can distinguish new ones */ oldExps = expKeyHashFromTable(connExp, table); newExps = hashNew(0); /* read mdb objects from database */ connMeta = sqlConnect(assembly); metas = mdbObjsQueryAll(connMeta, mdb); verbose(2, "Found %d objects\n", slCount(metas)); /* order so that oldest have lowest ids */ mdbObjsSortOnVars(&metas, "dateSubmitted lab dataType cell"); /* create new experiments */ while ((meta = slPopHead(&metas)) != NULL) { if (!mdbObjIsEncode(meta)) continue; if (composite != NULL && !mdbObjInComposite(meta, composite)) continue; exp = encodeExpFromMdb(connMeta,assembly,meta); if (exp == NULL) continue; key = encodeExpKey(exp); verbose(3, "key: %s\n", key); if (hashLookup(newExps, key) == NULL && hashLookup(oldExps, key) == NULL) { verbose(2, "Found new experiment - Date: %s Experiment %d: %s\n", mdbObjFindValue(meta, "dateSubmitted"), ++expNum, key); /* save new experiment */ hashAdd(newExps, key, NULL); slAddHead(&exps, exp); } /* Skip other metas belonging to the same exp by: struct mdbVar *edvs = mdbObjFindEncodeEdvs(connMeta,meta); // Can't use encodeExpVars(exp) because of "None" issues assert(edvs != NULL); char *expVars = slPairListToString(edvs,FALSE); // don't bother with quoting since edvs should not have spaces struct mdbObj *mdbExpObjs = mdbObjsFilterByVars(&metas,expVars,TRUE,TRUE); freeMem(expVars); mdbVarsFree(&edvs); // If you want to do this, then encodeExpFromMdb() above should be replaced with encodeExpFromMdbVars() mdbObjFree(&mdbExpObjs); // Filtering destroyed sort order // NOTE: Given the re-sort, this may not prove much more efficient mdbObjsSortOnVars(&metas, "dateSubmitted lab dataType cell"); */ } /* write out experiments in .ra format */ slReverse(&exps); while ((exp = slPopHead(&exps)) != NULL) { exp->organism = organism; encodeExpToRaFile(exp, f); } carefulClose(&f); sqlDisconnect(&connMeta); }
void hgLoadMafSummary(char *db, char *table, char *fileName) /* hgLoadMafSummary - Load a summary table of pairs in a maf into a database. */ { long mafCount = 0, allMafCount = 0; struct mafComp *mcMaster = NULL; struct mafAli *maf; struct mafFile *mf = mafOpen(fileName); struct sqlConnection *conn; FILE *f = hgCreateTabFile(".", table); long componentCount = 0; struct hash *componentHash = newHash(0); if (!test) { conn = sqlConnect(database); mafSummaryTableCreate(conn, table, hGetMinIndexLength(db)); } verbose(1, "Indexing and tabulating %s\n", fileName); /* process mafs */ while ((maf = mafNext(mf)) != NULL) { mcMaster = mafMaster(maf, mf, fileName); allMafCount++; if (mcMaster->srcSize < minSeqSize) continue; while (mcMaster->size > maxSize) { /* break maf into maxSize pieces */ int end = mcMaster->start + maxSize; struct mafAli *subMaf = mafSubset(maf, mcMaster->src, mcMaster->start, end); verbose(3, "Splitting maf %s:%d len %d\n", mcMaster->src, mcMaster->start, mcMaster->size); componentCount += processMaf(subMaf, componentHash, f, mf, fileName); mafAliFree(&subMaf); subMaf = mafSubset(maf, mcMaster->src, end, end + (mcMaster->size - maxSize)); mafAliFree(&maf); maf = subMaf; mcMaster = mafMaster(maf, mf, fileName); } if (mcMaster->size != 0) { /* remainder of maf after splitting off maxSize submafs */ componentCount += processMaf(maf, componentHash, f, mf, fileName); } mafAliFree(&maf); mafCount++; } mafFileFree(&mf); flushSummaryBlocks(componentHash, f); verbose(1, "Created %ld summary blocks from %ld components and %ld mafs from %s\n", summaryCount, componentCount, allMafCount, fileName); if (test) return; verbose(1, "Loading into %s table %s...\n", database, table); hgLoadTabFile(conn, ".", table, &f); verbose(1, "Loading complete"); hgEndUpdate(&conn, "Add %ld maf summary blocks from %s\n", summaryCount, fileName); }
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList) /* Load database from bedList. */ { struct sqlConnection *conn; struct dyString *dy = newDyString(1024); char *tab = (char *)NULL; int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0); if ( ! noLoad ) conn = sqlConnect(database); if ((char *)NULL != tmpDir) tab = cloneString(rTempName(tmpDir,"loadBed",".tab")); else tab = cloneString("bed.tab"); if (bedDetail && sqlTable == NULL) errAbort("bedDetail format requires sqlTable option"); if (bedDetail && !strictTab) errAbort("bedDetail format must be tab separated"); if (bedDetail && !noBin) noBin = TRUE; /* First make table definition. */ if (sqlTable != NULL && !oldTable) { /* Read from file. */ char *sql, *s; readInGulp(sqlTable, &sql, NULL); /* Chop off end-of-statement semicolon if need be. */ s = strchr(sql, ';'); if (s != NULL) *s = 0; if ( !noLoad ) { if (renameSqlTable) { char *pos = stringIn("CREATE TABLE ", sql); if (pos == NULL) errAbort("Can't find CREATE TABLE in %s\n", sqlTable); char *oldSql = cloneString(sql); nextWord(&pos); nextWord(&pos); char *tableName = nextWord(&pos); sql = replaceChars(oldSql, tableName, track); } verbose(1, "Creating table definition for %s\n", track); sqlRemakeTable(conn, track, sql); if (!noBin) addBinToEmptyTable(conn, track); adjustSqlTableColumns(conn, track, bedSize); } freez(&sql); } else if (!oldTable) { int minLength; if (noLoad) minLength=6; else if (maxChromNameLength) minLength = maxChromNameLength; else minLength = hGetMinIndexLength(database); verbose(2, "INDEX chrom length: %d\n", minLength); /* Create definition statement. */ verbose(1, "Creating table definition for %s\n", track); dyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " chrom varchar(255) not null,\n"); dyStringAppend(dy, " chromStart int unsigned not null,\n"); dyStringAppend(dy, " chromEnd int unsigned not null,\n"); if (bedSize >= 4) maybeBedGraph(4, dy, " name varchar(255) not null,\n"); if (bedSize >= 5) { if (allowNegativeScores) maybeBedGraph(5, dy, " score int not null,\n"); else maybeBedGraph(5, dy, " score int unsigned not null,\n"); } if (bedSize >= 6) maybeBedGraph(6, dy, " strand char(1) not null,\n"); if (bedSize >= 7) maybeBedGraph(7, dy, " thickStart int unsigned not null,\n"); if (bedSize >= 8) maybeBedGraph(8, dy, " thickEnd int unsigned not null,\n"); /* As of 2004-11-22 the reserved field is used as itemRgb in code */ if (bedSize >= 9) maybeBedGraph(9, dy, " reserved int unsigned not null,\n"); if (bedSize >= 10) maybeBedGraph(10, dy, " blockCount int unsigned not null,\n"); if (bedSize >= 11) maybeBedGraph(11, dy, " blockSizes longblob not null,\n"); if (bedSize >= 12) maybeBedGraph(12, dy, " chromStarts longblob not null,\n"); if (bedSize >= 13) maybeBedGraph(13, dy, " expCount int unsigned not null,\n"); if (bedSize >= 14) maybeBedGraph(14, dy, " expIds longblob not null,\n"); if (bedSize >= 15) maybeBedGraph(15, dy, " expScores longblob not null,\n"); dyStringAppend(dy, "#Indices\n"); if (nameIx && (bedSize >= 4) && (0 == bedGraph)) dyStringAppend(dy, " INDEX(name(16)),\n"); if (noBin) { dyStringPrintf(dy, " INDEX(chrom(%d),chromStart)\n", minLength); } else { dyStringPrintf(dy, " INDEX(chrom(%d),bin)\n", minLength); } dyStringAppend(dy, ")\n"); if (noLoad) verbose(2,"%s", dy->string); else sqlRemakeTable(conn, track, dy->string); } verbose(1, "Saving %s\n", tab); writeBedTab(tab, bedList, bedSize); if ( ! noLoad ) { verbose(1, "Loading %s\n", database); if (customTrackLoader) sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN); else sqlLoadTabFile(conn, tab, track, loadOptions); if (! noHistory) { char comment[256]; /* add a comment to the history table and finish up connection */ safef(comment, sizeof(comment), "Add %d element(s) from bed list to %s table", slCount(bedList), track); hgHistoryComment(conn, comment); } if(fillInScoreColumn != NULL) { char query[500]; char buf[500]; struct sqlResult *sr; safef(query, sizeof(query), "select sum(score) from %s", track); if(sqlQuickQuery(conn, query, buf, sizeof(buf))) { unsigned sum = sqlUnsigned(buf); if (!sum) { safef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track); if ((sr = sqlGetResult(conn, query)) != NULL) { char **row = sqlNextRow(sr); if(row != NULL) { float min = sqlFloat(row[0]); float max = sqlFloat(row[1]); if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present { if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]); sqlFreeResult(&sr); // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000 float a = (1000-minScore) / (max - min); float b = 1000 - ((1000-minScore) * max) / (max - min); safef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)", track, a, fillInScoreColumn, b); int changed = sqlUpdateRows(conn, query, NULL); verbose(2, "update query: %s; changed: %d\n", query, changed); } else { sqlFreeResult(&sr); verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn); } } } } } } sqlDisconnect(&conn); /* if temp dir specified, unlink file to make it disappear */ if ((char *)NULL != tmpDir) unlink(tab); } else verbose(1, "No load option selected, see file: %s\n", tab); } /* static void loadDatabase() */
void netClass(char *inName, char *tDb, char *qDb, char *outName) /* netClass - Add classification info to net. */ { struct chainNet *net; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct chrom *qChromList, *chrom; struct hash *qChromHash; struct hash *arHash = NULL; struct sqlConnection *tConn = sqlConnect(tDb); struct sqlConnection *qConn = sqlConnect(qDb); qLm = lmInit(0); if (!noAr) arHash = getAncientRepeats(tConn, qConn); getChroms(qConn, &qChromHash, &qChromList); verbose(1, "Reading gaps in %s\n", qDb); if (sqlTableExists(qConn, "gap")) { getSeqGapsUnsplit(qConn, qChromHash); } else { for (chrom = qChromList; chrom != NULL; chrom = chrom->next) chrom->nGaps = getSeqGaps(qConn, chrom->name); } if (qNewR) { verbose(1, "Reading new repeats from %s\n", qNewR); for (chrom = qChromList; chrom != NULL; chrom = chrom->next) chrom->newRepeats = getNewRepeats(qNewR, chrom->name); } verbose(1, "Reading simpleRepeats in %s\n", qDb); getTrfUnsplit(qConn, qChromHash); if (qRepeatTable) { verbose(1, "Reading repeats in %s from table %s\n", qDb, qRepeatTable); getRepeatsUnsplitTable(qConn, qChromHash, qRepeatTable); } else { verbose(1, "Reading repeats in %s\n", qDb); if (sqlTableExists(qConn, "rmsk")) getRepeatsUnsplit(qConn, qChromHash, arHash); else { for (chrom = qChromList; chrom != NULL; chrom = chrom->next) getRepeats(qConn, arHash, chrom->name, &chrom->repeats, &chrom->oldRepeats); } } while ((net = chainNetRead(lf)) != NULL) { struct rbTree *tN, *tRepeats, *tOldRepeats, *tTrf; char *tName = net->name; if (liftHashT != NULL) { struct liftSpec *lft = hashMustFindVal(liftHashT, net->name); tName = lft->newName; } verbose(1, "Processing %s.%s\n", tDb, net->name); tN = getSeqGaps(tConn, tName); tAddN(net, net->fillList, tN); rbTreeFree(&tN); qAddN(net, net->fillList, qChromHash); if (tRepeatTable) getRepeatsTable(tConn, tRepeatTable, tName, &tRepeats, &tOldRepeats); else getRepeats(tConn, arHash, tName, &tRepeats, &tOldRepeats); tAddR(net, net->fillList, tRepeats); if (!noAr) tAddOldR(net, net->fillList, tOldRepeats); rbTreeFree(&tRepeats); rbTreeFree(&tOldRepeats); qAddR(net, net->fillList, qChromHash); if (!noAr) qAddOldR(net, net->fillList, qChromHash); tTrf = getTrf(tConn, tName); tAddTrf(net, net->fillList, tTrf); rbTreeFree(&tTrf); qAddTrf(net, net->fillList, qChromHash); if (tNewR) { struct rbTree *tree = getNewRepeats(tNewR, tName); tAddNewR(net, net->fillList, tree); rbTreeFree(&tree); } if (qNewR) qAddNewR(net, net->fillList, qChromHash); chainNetWrite(net, f); chainNetFree(&net); } sqlDisconnect(&tConn); sqlDisconnect(&qConn); }
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable) /* hgExpDistance - Create table that measures expression distance between pairs. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[256]; char **row; struct hash *expHash = hashNew(16); int realExpCount = -1; struct microData *gene; int rc, t; pthread_t *threads = NULL; pthread_attr_t attr; int *threadID = NULL; void *status; char *tempDir = "."; int arrayNum; struct microDataDistance *geneDistPtr = NULL; struct microDataDistance *geneDistArray = NULL; int geneIx; FILE *f = NULL; /* Get list/hash of all items with expression values. */ safef(query, sizeof(query), "select name,expCount,expScores from %s", posTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; if (!hashLookup(expHash, name)) { int expCount = sqlUnsigned(row[1]); int commaCount; float *expScores = NULL; sqlFloatDynamicArray(row[2], &expScores, &commaCount); if (expCount != commaCount) errAbort("expCount and expScores don't match on %s in %s", name, posTable); if (realExpCount == -1) realExpCount = expCount; if (expCount != realExpCount) errAbort("In %s some rows have %d experiments others %d", name, expCount, realExpCount); AllocVar(gene); gene->expCount = expCount; gene->expScores = expScores; hashAddSaveName(expHash, name, gene, &gene->name); slAddHead(&geneList, gene); } } sqlFreeResult(&sr); conn = sqlConnect(database); slReverse(&geneList); geneCount = slCount(geneList); printf("Have %d elements in %s\n", geneCount, posTable); weights = getWeights(realExpCount); if (optionExists("lookup")) geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList); geneCount = slCount(geneList); printf("Got %d unique elements in %s\n", geneCount, posTable); sqlDisconnect(&conn); /* Disconnect because next step is slow. */ if (geneCount < 1) errAbort("ERROR: unique gene count less than one ?"); f = hgCreateTabFile(tempDir, outTable); synQ = synQueueNew(); /* instantiate threads */ AllocArray( threadID, numThreads ); AllocArray( threads, numThreads ); pthread_attr_init( &attr ); pthread_mutex_init( &mutexDotOut, NULL ); pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); for (t = 0; t < numThreads; t++) { threadID[t] = t; rc = pthread_create( &threads[t], &attr, computeDistance, (void *) &threadID[t]); if (rc) errAbort("ERROR: in pthread_create() %d\n", rc ); } /* this thread will write to the file from the queue */ for (arrayNum = 0; arrayNum < geneCount; arrayNum++) { geneDistArray = (struct microDataDistance *)synQueueGet( synQ ); geneDistPtr = geneDistArray; /* Print out closest GENEDISTS distances in tab file. */ for (geneIx=0; geneIx < GENEDISTS && geneIx < geneCount; ++geneIx, geneDistPtr++) if (geneDistPtr != NULL) fprintf(f, "%s\t%s\t%f\n", geneDistPtr->name1, geneDistPtr->name2, geneDistPtr->distance); else errAbort("ERROR: writing distance %d to file\n", geneIx); freeMem( geneDistArray ); } /* synchronize all threads */ for (t = 0; t < numThreads; t++) { rc = pthread_join( threads[t], &status); if (rc) errAbort("ERROR: in pthread_join() %d\n", rc ); } printf("Made %s.tab\n", outTable); slFreeList( &geneList ); pthread_mutex_destroy( &mutexDotOut ); pthread_attr_destroy( &attr ); /* Create and load table. */ conn = sqlConnect(database); distanceTableCreate(conn, outTable); hgLoadTabFile(conn, tempDir, outTable, &f); printf("Loaded %s\n", outTable); /* Add indices. */ safef(query, sizeof(query), "alter table %s add index(query(12))", outTable); sqlUpdate(conn, query); printf("Made query index\n"); if (optionExists("targetIndex")) { safef(query, sizeof(query), "alter table %s add index(target(12))", outTable); sqlUpdate(conn, query); printf("Made target index\n"); } hgRemoveTabFile(tempDir, outTable); }
void hgDumpWiggle(int trackCount, char *tracks[]) /* hgDumpWiggle - dump wiggle data from database or .wig file */ { int i; struct wiggle *wiggle; for (i=0; i<trackCount; ++i) verbose(2, "#\ttrack: %s\n", tracks[i]); if (db) { struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; char query[256]; char *wibFile = NULL; FILE *f = (FILE *) NULL; unsigned char *ReadData; /* the bytes read in from the file */ int dataOffset = 0; /* within data block during reading */ int rowCount = 0; conn = sqlConnect(db); for (i=0; i<trackCount; ++i) { if (chr) sqlSafef(query, 256, "select * from %s where chrom = \"%s\"\n", tracks[i], chr); else sqlSafef(query, 256, "select * from %s\n", tracks[i]); verbose(2, "#\t%s\n", query); sr = sqlGetResult(conn,query); while ((row = sqlNextRow(sr)) != NULL) { ++rowCount; wiggle = wiggleLoad(row + 1); /* the +1 avoids the bin column*/ verbose(2, "#\trow: %d, start: %u, data range: %g: [%g:%g]\n", rowCount, wiggle->chromStart, wiggle->dataRange, wiggle->lowerLimit, wiggle->lowerLimit+wiggle->dataRange); verbose(2, "#\tresolution: %g per bin\n",wiggle->dataRange/(double)MAX_WIG_VALUE); if (wibFile) { if (differentString(wibFile,wiggle->file)) { if (f != (FILE *) NULL) { fclose(f); freeMem(wibFile); } wibFile = cloneString(wiggle->file); f = mustOpen(wibFile, "r"); } } else { wibFile = cloneString(wiggle->file); f = mustOpen(wibFile, "r"); } fseek(f, wiggle->offset, SEEK_SET); ReadData = (unsigned char *) needMem((size_t) (wiggle->count + 1)); fread(ReadData, (size_t) wiggle->count, (size_t) sizeof(unsigned char), f); verbose(2, "#\trow: %d, reading: %u bytes\n", rowCount, wiggle->count); for (dataOffset = 0; dataOffset < wiggle->count; ++dataOffset) { unsigned char datum = ReadData[dataOffset]; if (datum != WIG_NO_DATA) { double dataValue = wiggle->lowerLimit+(((double)datum/(double)MAX_WIG_VALUE)*wiggle->dataRange); printf("%d\t%g\n", 1 + wiggle->chromStart + (dataOffset * wiggle->span), dataValue); } } } } if (f != (FILE *) NULL) { fclose(f); } if (wibFile) freeMem(wibFile); sqlFreeResult(&sr); hFreeConn(&conn); } else { warn("ERROR: file option has not been implemented yet ..."); } } /* void hgDumpWiggle(int trackCount, char *tracks[]) */
void doMiddle(struct cart *theCart) /* Print the body of an html file. */ { char cond_str[255]; struct sqlConnection *conn; char *proteinAC; char *chp, *chp1, *chp9; char *debugTmp = NULL; char *chromStr, *cdsStartStr, *cdsEndStr, posStr[255]; char *supportedGenomeDatabase; char *answer; char *queryID; /* Initialize layout and database. */ cart = theCart; /* Uncomment this to see parameters for debugging. */ /* Be careful though, it breaks if custom track * is more than 4k */ /* { struct dyString *state = cgiUrlString(); hPrintf("State: %s\n", state->string); } */ queryID = cartOptionalString(cart, "proteinID"); if (sameString(queryID, "")) { errAbort("Please go back and enter a gene symbol or a Swiss-Prot/TrEMBL protein ID.\n"); } if (cgiVarExists("db")) { /* if db is known, get key variables set */ proteinInSupportedGenome = TRUE; database = cgiOptionalString("db"); organism = hDbOrganism(database); protDbName = hPdbFromGdb(database); proteinID = strdup(queryID); } else { protCntInSwissByGene = searchProteinsInSwissProtByGene(queryID); /* no CGI 'db' variable means it did not come in from GB but from pbGateway */ /* search existing GB databases to see if this protein can be found */ protCntInSupportedGenomeDb = searchProteinsInSupportedGenomes(queryID, &supportedGenomeDatabase); if ((protCntInSupportedGenomeDb > 1) || protCntInSwissByGene >= 1) { /* more than 1 proteins match the query ID, present selection web page */ proteinInSupportedGenome = 1; presentProteinSelections(queryID, protCntInSwissByGene, protCntInSupportedGenomeDb); return; } else { if (protCntInSupportedGenomeDb == 1) { /* one and only one protein found in a genome DB that support KG and PB */ proteinInSupportedGenome = TRUE; database = strdup(supportedGenomeDatabase); organism = hDbOrganism(database); protDbName = hPdbFromGdb(database); proteinID=strdup(queryID); } else { /* not found in genome DBs that support KG/PB */ /* now search PROTEOME_DB_NAMES to see if this protein is there. */ answer = uniProtFindPrimAcc(queryID); if (answer == NULL) { if (hIsGsidServer()) { errAbort( "'%s' does not seem to be a valid protein ID.<br><br>Click <A HREF=\"../cgi-bin/gsidPbGateway\">here</A> to start another query." , queryID); } else { errAbort( "'%s' does not seem to be a valid UniProt(Swiss-Prot/TrEMBL) protein ID or a gene symbol.<br><br>Click <A HREF=\"../cgi-bin/pbGateway\">here</A> to start another query." , queryID); } } proteinInSupportedGenome = FALSE; database = strdup(GLOBAL_PB_DB); organism = strdup(""); protDbName = strdup(PROTEOME_DB_NAME); proteinID = strdup(answer); } } if (proteinInSupportedGenome) { spConn = sqlConnect(database); safef(cond_str, sizeof(cond_str), "alias='%s'", queryID); proteinID = sqlGetField(database, "kgSpAlias", "spID", cond_str); safef(cond_str, sizeof(cond_str), "spID='%s'", proteinID); answer = sqlGetField(database, "kgXref", "spDisplayID", cond_str); safef(cond_str, sizeof(cond_str), "proteinID='%s'", answer); chromStr = sqlGetField(database, "knownGene", "chrom", cond_str); if (chromStr) { cdsStartStr = sqlGetField(database, "knownGene", "cdsStart", cond_str); cdsEndStr = sqlGetField(database, "knownGene", "cdsEnd", cond_str); safef(posStr, sizeof(posStr), "%s:%s-%s", chromStr, cdsStartStr, cdsEndStr); positionStr = strdup(posStr); cartSetString(cart, "position", positionStr); cartSetString(cart, "organism", organism); } } } /* print out key variables for debugging */ /* printf("<br>before enter main section: <br>proteinInSupportedGenome=%d<br>proteinID=%s <br>database=%s <br>organism=%s <br>protDbName=%s\n", proteinInSupportedGenome, proteinID, database, organism, protDbName);fflush(stdout); */ if (hTableExists(database, "kgProtMap2")) { kgVersion = KG_III; strcpy(kgProtMapTableName, "kgProtMap2"); } debugTmp = cartUsualString(cart, "hgDebug", "off"); if(sameString(debugTmp, "on")) hgDebug = TRUE; else hgDebug = FALSE; conn = hAllocConn(database); hgsid = cartOptionalString(cart, "hgsid"); if (hgsid != NULL) { safef(hgsidStr, sizeof(hgsidStr), "&hgsid=%s", hgsid); } else { strcpy(hgsidStr, ""); } /* check proteinID to see if it is a valid SWISS-PROT/TrEMBL accession or display ID */ /* then assign the accession number to global variable proteinID */ safef(cond_str, sizeof(cond_str), "accession='%s'", proteinID); proteinAC = sqlGetField(protDbName, "spXref3", "accession", cond_str); if (proteinAC == NULL) { safef(cond_str, sizeof(cond_str), "displayID='%s'", proteinID); proteinAC = sqlGetField(protDbName, "spXref3", "accession", cond_str); if (proteinAC == NULL) { if (hIsGsidServer()) { safef(cond_str, sizeof(cond_str), "acc='%s'", proteinID); proteinAC = sqlGetField(protDbName, "uniProtAlias", "acc", cond_str); if (proteinAC != NULL) { protDisplayID = proteinID; proteinID = proteinAC; } else { errAbort("'%s' does not seem to be a valid protein ID.<br><br>Click <A HREF=\"../cgi-bin/pbGateway\">here</A> to start another query." , proteinID); } } else { errAbort("'%s' does not seem to be a valid Swiss-Prot/TrEMBL protein ID or gene symbol.<br><br>Click <A HREF=\"../cgi-bin/pbGateway\">here</A> to start another query." , proteinID); } } else { protDisplayID = proteinID; proteinID = proteinAC; } } else { safef(cond_str, sizeof(cond_str), "accession='%s'", proteinID); protDisplayID = sqlGetField(protDbName, "spXref3", "displayID", cond_str); } if (proteinInSupportedGenome) { if (kgVersion == KG_III) { safef(cond_str, sizeof(cond_str), "spId='%s'", proteinID); mrnaID = sqlGetField(database, "kgXref", "kgId", cond_str); } else { safef(cond_str, sizeof(cond_str), "proteinID='%s'", protDisplayID); mrnaID = sqlGetField(database, "knownGene", "name", cond_str); } } else { mrnaID = NULL; positionStr = NULL; } safef(cond_str, sizeof(cond_str), "accession='%s'", proteinID); description = sqlGetField(protDbName, "spXref3", "description", cond_str); if (positionStr != NULL) { chp = strstr(positionStr, ":"); *chp = '\0'; prevGBChrom = cloneString(positionStr); chp1 = chp + 1; chp9 = strstr(chp1, "-"); *chp9 = '\0'; prevGBStartPos = atoi(chp1); chp1 = chp9 + 1; prevGBEndPos = atoi(chp1); } else { prevGBChrom = NULL; prevGBStartPos = -1; prevGBEndPos = -1; } /* Do main display. */ if (cgiVarExists("pbt.psOutput")) handlePostscript(); else { doTrackForm(NULL, NULL); } }
void makeActiveImagePB(char *psOutput, char *psOutput2) /* Make image and image map. */ { char *mapName = "map"; int pixWidth, pixHeight; struct sqlConnection *conn; char query[256]; struct sqlResult *sr; char **row; int iypos; char *spDisplayId; char *oldDisplayId; conn = sqlConnect(UNIPROT_DB_NAME); printf("<BR>"); hPrintf("<BR><font size=4><B>Protein: "); hPrintf("%s</B>", proteinID); /* Please note the hiv database name is hard wired here.*/ safef(query, sizeof(query), "select subjId from hivVax003Vax004.gsIdXref where aaSeqId = '%s'", proteinID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { printf("<BR>"); hPrintf("<font size=4><B>Subject: "); hPrintf("<A HREF=\"../cgi-bin/gsidSubj?hgs_subj=%s&submit=Go!\">", row[0]); hPrintf("%s</A></B><BR>", row[0]); } sqlFreeResult(&sr); spDisplayId = spAccToId(conn, spFindAcc(conn, proteinID)); if (strstr(spDisplayId, spFindAcc(conn, proteinID)) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId)) { hPrintf(" or %s", oldSpDisplayId(spDisplayId)); } } hPrintf(")\n"); } hPrintf("</font><br>"); protSeq = getAA(proteinID); if (protSeq == NULL) { errAbort("%s is not a current valid entry in UniProt(SWISS-PROT/TrEMBL)\n", proteinID); } protSeqLen = strlen(protSeq); fflush(stdout); iypos = 15; doTracks(proteinID, mrnaID, protSeq, &iypos, psOutput); if (!hTableExists(database, "pbStamp")) goto histDone; pbScale = 3; pixWidth = 520; insideWidth = pixWidth-gfxBorder; pixHeight = 350; if (psOutput2) { vg2 = vgOpenPostScript(pixWidth, pixHeight, psOutput2); } else { trashDirFile(&gifTn2, "pbt", "pbt", ".gif"); vg2 = vgOpenGif(pixWidth, pixHeight, gifTn2.forCgi, FALSE); } g_vg = vg2; pbRed = vgFindColorIx(vg2, 0xf9, 0x51, 0x59); pbBlue = vgFindColorIx(g_vg, 0x00, 0x00, 0xd0); normalColor = pbBlue; abnormalColor = pbRed; bkgColor = vgFindColorIx(vg2, 255, 254, 232); vgBox(vg2, 0, 0, insideWidth, pixHeight, bkgColor); /* Start up client side map. */ mapName=cloneString("pbStamps"); hPrintf("\n<MAP Name=%s>\n", mapName); vgSetClip(vg2, 0, gfxBorder, insideWidth, pixHeight - 2*gfxBorder); iypos = 15; /* Draw stamps. */ doStamps(proteinID, mrnaID, protSeq, vg2, &iypos); /* Finish map. */ hPrintf("</MAP>\n"); /* Save out picture and tell html file about it. */ vgClose(&vg2); hPrintf("<P>"); hPrintf("\n<IMG SRC=\"%s\" BORDER=1 WIDTH=%d HEIGHT=%d USEMAP=#%s><BR>", gifTn2.forCgi, pixWidth, pixHeight, mapName); if (proteinInSupportedGenome) { hPrintf("\n<A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml#histograms\" TARGET=_blank>"); } else { hPrintf("\n<A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbGsid/pbTracksHelp.shtml#histograms\" TARGET=_blank>"); } hPrintf("Explanation of Protein Property Histograms</A><BR>"); hPrintf("<P>"); histDone: hPrintf("<P>"); fflush(stdout); printFASTA(proteinID, protSeq); }
void hgTrackDb(char *org, char *database, char *trackDbName, char *sqlFile, char *hgRoot, boolean strict) /* hgTrackDb - Create trackDb table from text files. */ { struct trackDb *td; char tab[PATH_LEN]; safef(tab, sizeof(tab), "%s.tab", trackDbName); struct trackDb *tdbList = buildTrackDb(org, database, hgRoot, strict); tdbList = flatten(tdbList); slSort(&tdbList, trackDbCmp); verbose(1, "Loaded %d track descriptions total\n", slCount(tdbList)); /* Write to tab-separated file; hold off on html, since it must be encoded */ { verbose(2, "Starting write of tabs to %s\n", tab); FILE *f = mustOpen(tab, "w"); for (td = tdbList; td != NULL; td = td->next) { hVarSubstTrackDb(td, database); char *hold = td->html; td->html = ""; subChar(td->type, '\t', ' '); /* Tabs confuse things. */ subChar(td->shortLabel, '\t', ' '); /* Tabs confuse things. */ subChar(td->longLabel, '\t', ' '); /* Tabs confuse things. */ trackDbTabOut(td, f); td->html = hold; } carefulClose(&f); verbose(2, "Wrote tab representation to %s\n", tab); } /* Update database */ { char *create, *end; char query[256]; struct sqlConnection *conn = sqlConnect(database); /* Load in table definition. */ readInGulp(sqlFile, &create, NULL); create = trimSpaces(create); create = substituteTrackName(create, trackDbName); end = create + strlen(create)-1; if (*end == ';') *end = 0; sqlRemakeTable(conn, trackDbName, create); /* Load in regular fields. */ sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tab, trackDbName); verbose(2, "sending mysql \"%s\"\n", query); sqlUpdate(conn, query); verbose(2, "done tab file load"); /* Load in html and settings fields. */ for (td = tdbList; td != NULL; td = td->next) { if (isEmpty(td->html)) { if (strict && !trackDbLocalSetting(td, "parent") && !trackDbLocalSetting(td, "superTrack") && !sameString(td->track,"cytoBandIdeo")) { fprintf(stderr, "Warning: html missing for %s %s %s '%s'\n",org, database, td->track, td->shortLabel); } } else { updateBigTextField(conn, trackDbName, "tableName", td->track, "html", td->html); } if (td->settingsHash != NULL) { char *settings = settingsFromHash(td->settingsHash); updateBigTextField(conn, trackDbName, "tableName", td->track, "settings", settings); if (showSettings) { verbose(1, "%s: type='%s';", td->track, td->type); if (isNotEmpty(settings)) { char *oneLine = replaceChars(settings, "\n", "; "); eraseTrailingSpaces(oneLine); verbose(1, " %s", oneLine); freeMem(oneLine); } verbose(1, "\n"); } freeMem(settings); } } sqlDisconnect(&conn); verbose(1, "Loaded database %s\n", database); } }
static void gapToLift(char *db, char *outFile) /* gapToLift - create lift file from gap table(s). */ { FILE *out = mustOpen(outFile, "w"); struct sqlConnection *conn = sqlConnect(db); struct chromInfo *cInfoList = loadChromInfo(conn); struct agpGap *gapList = loadAllGaps(conn, db, cInfoList); struct agpGap *gap; int start = 0; int end = 0; char *prevChr = NULL; int liftCount = 0; int chrSize = 0; static struct hash *chrDone = NULL; chrDone = newHash(0); if (isNotEmpty(bedFileName)) { bedFile = mustOpen(bedFileName, "w"); verbose(2,"#\tbed output requested to %s\n", bedFileName); } for (gap = gapList; gap; gap = gap->next) { verbose(3,"#\t%s\t%d\t%d\t%s\n", gap->chrom, gap->chromStart, gap->chromEnd, gap->bridge); if (prevChr && sameWord(prevChr, gap->chrom)) { /* continuing same segment, check for gap break, * or gap at end of chrom */ if (sameWord("no",gap->bridge) || (gap->chromEnd == chrSize)) { end = gap->chromStart; liftCount = liftOutLine(out, gap->chrom, start, end, liftCount, chrSize); start = gap->chromEnd; end = start; } else end = gap->chromEnd; } else /* new chrom encountered */ { /* output last segment of previous chrom when necessary */ if (prevChr && differentWord(prevChr, gap->chrom)) { if (end < chrSize) liftCount = liftOutLine(out, prevChr, start, chrSize, liftCount, chrSize); } liftCount = 0; chrSize = hashIntVal(cInfoHash, gap->chrom); hashAddInt(chrDone, gap->chrom, 1); if (gap->chromStart > 0) { /* starting first segment at position 0 */ start = 0; end = gap->chromStart; /* does the first gap break it ? Or gap goes to end of chrom. */ if (sameWord("no",gap->bridge) || (gap->chromEnd == chrSize)) { liftCount = liftOutLine(out, gap->chrom, start, end, liftCount, chrSize); start = gap->chromEnd; end = start; } } else /* first gap is actually the beginning of the chrom */ { /* thus, first segment starts after this first gap */ start = gap->chromEnd; end = start; } } prevChr = gap->chrom; /* remember prev chrom to detect next chrom */ } /* potentially a last one */ if (end < chrSize) liftCount = liftOutLine(out, prevChr, start, chrSize, liftCount, chrSize); /* check that all chroms have been used */ struct hashCookie cookie = hashFirst(cInfoHash); struct hashEl *hel; while ((hel = hashNext(&cookie)) != NULL) { if (NULL == hashLookup(chrDone, hel->name)) { chrSize = hashIntVal(cInfoHash, hel->name); verbose(2, "#\tno gaps on chrom: %s, size: %d\n", hel->name, chrSize); liftCount = liftOutLine(out, hel->name, 0, chrSize, 0, chrSize); } } carefulClose(&out); sqlDisconnect(&conn); }
void cdwJobCleanFailed(int submitId) /* Check out the symlink to determine its type. */ { struct sqlConnection *conn = sqlConnect("cdw"); struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select id, commandLine, startTime, endTime, returnCode, pid from cdwJob where submitId=%d " "order by commandLine,CAST(returnCode AS unsigned)", submitId); // NOTE we need this CAST on returnCode since it can be -1. we want success 0 first. // TODO DO we need to add any other conditions such as distinguishing // between running, queued, and done? /* Scan through result set finding redundant rows beyond success row. */ struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; char *lastCommand = ""; boolean success = FALSE; struct slInt *list = NULL; struct slInt *e; while ((row = sqlNextRow(sr)) != NULL) { unsigned int id = sqlUnsigned(row[0]); char *commandLine = row[1]; unsigned long startTime = sqlUnsignedLong(row[2]); unsigned long endTime = sqlUnsignedLong(row[3]); int returnCode = sqlSigned(row[4]); unsigned int pid = sqlUnsigned(row[5]); verbose(2, "%u\t%s\t%lu\t%lu\t%d\t%u\t%u\n", id, commandLine, startTime, endTime, returnCode, pid, submitId); if (sameString(lastCommand, commandLine)) { if (success) // we already succeeded, the old failure is unwanted baggage. { e = slIntNew(id); // or add it to a list of rows whose ids should get removed slAddHead(&list, e); } } else { if (returnCode == 0) success = TRUE; else success = FALSE; } // note fields pid and submitId are defined as signed integers in cdwJob table, probably should be unsigned. lastCommand = cloneString(commandLine); } sqlFreeResult(&sr); slReverse(&list); for(e=list;e;e=e->next) { dyStringClear(query); sqlDyStringPrintf(query, "delete from cdwJob where id=%u", (unsigned int) e->val); //printf("%s\n", query->string); sqlUpdate(conn, query->string); } /* Clean up and go home */ dyStringFree(&query); sqlDisconnect(&conn); }
/* Version for Zoo species */ boolean convertCoordinatesZoo(FILE *goodOut, FILE *badOut, void (*goodResult)(FILE *out, struct coordConvRep *report), void (*badResult)(FILE *out, struct coordConvRep *report)) /* tries to convert coordinates and prints report depending on function pointers provided. In generial goodResult and badResult either generate html or tesxt if we are in cgi or testing mode respectively. */ { struct blatServerTable *serve = NULL; struct coordConvRep *ccr = createCoordConvRep_mod(); struct dbDb *newDbRec = NULL, *oldDbRec = NULL; struct sqlConnection *conn = sqlConnect(origGenome); struct linkedFeatures *lfList = NULL, *lf; struct sqlResult *sr = NULL; boolean success = FALSE; /* Keeps track if we're in an inverted match or not */ boolean inversion = FALSE; /* Two possible reasons two fail */ boolean incoherent = FALSE; boolean max_apart= FALSE; char track[256]; char success_message[256]; char **row; int rowOffset; int conv_total=0; int iteration = 0; /* These two distances check how different the distance is between the converted and unconverted coordinates. In this case if the distance between a converted versus unconverted block is more than 10 times and greater than 10 000 bases, set up a warning... */ int ref_end=0,ref_start,comp_end=0,comp_start=0; /* Load info from databases into ccr */ oldDbRec = loadDbInformation_mod(origGenome); ccr->from->chrom = cloneString(chrom); ccr->from->chromStart = chromStart; ccr->from->chromEnd = chromEnd; ccr->from->version = cloneString(oldDbRec->name); ccr->from->date = cloneString(oldDbRec->description); ccr->from->nibDir = cloneString(oldDbRec->nibPath); ccr->seqSize=1000; newDbRec = loadDbInformation_mod(newGenome); ccr->to->version = cloneString(newDbRec->name); ccr->to->date = cloneString(newDbRec->description); ccr->to->nibDir = cloneString(newDbRec->nibPath); ccr->good=FALSE; /* Create the correct track name... Will have to be changed when multiple versions? */ sprintf(track,"%s_%s",origGenome,newGenome); /* Get the information from loading the track. */ /* Double check we are not using a track connecting 1 and 2 */ if(!(strstr(track,"2") && strstr(track,"1"))) { sr = hRangeQuery(conn, track, chrom, chromStart, chromEnd, NULL, &rowOffset); } while ((row = sqlNextRow(sr)) != NULL) { /* Find the correponding track */ struct psl *psl = pslLoad(row+rowOffset); /* If first time through... */ if(iteration==0) { /* Fill in stuff if first time through... */ ccr->to->chrom=cloneString(psl->qName); ccr->to->chromStart=psl->qStart; /* Actual point of conversion of coordinates */ ccr->from->next->chromStart=psl->tStart; ccr->good=TRUE; success=TRUE; } /* check for erroneous conversion if not first time through */ /* Check for inversions, massive insertions... */ /* Check for inversion (old start is "bigger" than new start)*/ if(iteration > 0) { if((comp_start> psl->qStart)) { /* If not currently in an inversion state */ if(!inversion ) /* If not the second time through (first time inversion could be detected) */ if(iteration > 2) incoherent=TRUE; /* Reset variables used for measuring distance... */ /* Set inversion state variable to true */ inversion = TRUE; /* Check to see if there are too great distances ... */ if( ((comp_start - psl->qEnd)>(10 * (psl->tStart - ref_end))) && ((comp_start - psl->qEnd) > 10000)) max_apart=TRUE; } else /* No inversion */ { /* Check if previous state was an inversion (then flip flop)...*/ if(inversion) incoherent = TRUE; else { /* Check to see if the mapping is too far apart */ if( ((psl->qStart - comp_end) > (10 * (psl->tStart - ref_end))) && ((psl->qStart - comp_end) > 10000)) max_apart=TRUE; } } } if(inversion) { if(iteration == 1) ccr->to->chromEnd=comp_end; ccr->to->chromStart=psl->qStart; } else ccr->to->chromEnd=psl->qEnd; ccr->from->next->chromEnd=psl->tEnd; if(max_apart || incoherent) { success=FALSE; break; } if(psl->tStart > ref_end) conv_total+=(psl->tEnd - psl->tStart); else conv_total+=(psl->tEnd - ref_end); ref_end=psl->tEnd; comp_end=psl->qEnd; ref_start=psl->tStart; comp_start=psl->qStart; iteration++; pslFree(&psl); } if(!success) { /* Check to see if using version two of zoo. Not integrated into the database at this stage... */ if((strstr(origGenome,"2") && strstr(newGenome,"1"))|| (strstr(newGenome,"2") && strstr(origGenome,"1"))) sprintf(success_message,"Couldn't convert between these two genomes since the cross conversion between the two zoo dataset hasn't been fully integrated into the database"); else if (max_apart) sprintf(success_message, "Coordinates couldn't reliably be converted between the two species. Try using a smaller window. "); else if (incoherent) sprintf(success_message, "Coordinates couldn't be converted due to inconsistent inversions."); else sprintf(success_message,"Couldn't find a corresponding region for the original genome to the new genome."); ccr->msg=cloneString(success_message); badResult(badOut,ccr); } else { sprintf(success_message,"Successfully converted (%3.1f%% of the original region was converted.)",((float)(conv_total * 100))/(float)(chromEnd-chromStart)); ccr->msg=cloneString(success_message); goodResult(goodOut,ccr); } dbDbFree(&oldDbRec); dbDbFree(&newDbRec); coordConvRepFreeList(&ccr); return success; }
void hgKnownMore(char *database, char *loc2ref, char *mim2loc, char *omimIds, char *nomeIds) /* hgKnownMore - Create the knownMore table from a variety of sources.. */ { struct hash *pgiHash = NULL; /* Hash of rsInfo indexed by gi. */ struct hash *locHash = NULL; /* Hash of rsInfo indexed by locusLink IDs. */ struct rsInfo *rs; struct hash *hmOmimHash = NULL, *hmSymbolHash = NULL; struct hash *mimHash = NULL; struct hugoMulti *hmList = NULL, *hm; struct hash *nameOmimHash = NULL, *omimNameHash = NULL; struct nameOmim *nameOmimList = NULL, *nameOmim; struct knownInfo *kiList = NULL, *ki; struct knownMore km; struct sqlConnection *conn; char *tabName = "knownMore.tab"; FILE *f = NULL; char *omimIdString = NULL; char query[256]; readLoc2ref(loc2ref, &pgiHash, &locHash); readMim(mim2loc, &mimHash); readHugoMultiTable(nomeIds, &hmList, &hmOmimHash, &hmSymbolHash); printf("Read %d elements in %s\n", slCount(hmList), nomeIds); readNameOmim(omimIds, &nameOmimList, &nameOmimHash, &omimNameHash); printf("Read %d elements in %s\n", slCount(nameOmimList), omimIds); conn = sqlConnect(database); kiList = loadKnownInfo(conn); printf("Read %d elements from knownInfo table in %s\n", slCount(kiList), database); printf("Writing %s\n", tabName); f = mustOpen(tabName, "w"); for (ki = kiList; ki != NULL; ki = ki->next) { /* Fill out a knownMore data structure. Start with all zero * just to avoid garbage. */ zeroBytes(&km, sizeof(km)); /* First fields come from knownInfo generally. */ km.name = ki->name; /* The name displayed in the browser: OMIM, gbGeneName, or transId */ km.transId = ki->transId; /* Transcript id. Genie generated ID. */ km.geneId = ki->geneId; /* Gene (not transcript) Genie ID */ km.gbGeneName = ki->geneName; /* Connect to geneName table. Genbank gene name */ km.gbProductName = ki->productName; /* Connects to productName table. Genbank product name */ km.gbProteinAcc = ki->proteinId; /* Genbank accession of protein */ km.gbNgi = ki->ngi; /* Genbank gi of nucleotide seq. */ km.gbPgi = ki->pgi; /* Genbank gi of protein seq. */ /* Fill in rest with acceptable values for no-data-present. */ km.omimId = 0; /* OMIM ID or 0 if none */ km.omimName = ""; /* OMIM primary name */ km.hugoId = 0; /* HUGO Nomeclature Committee ID or 0 if none */ km.hugoSymbol = ""; /* HUGO short name */ km.hugoName = ""; /* HUGO descriptive name */ km.hugoMap = ""; /* HUGO Map position */ km.pmId1 = 0; /* I have no idea - grabbed from a HUGO nomeids.txt */ km.pmId2 = 0; /* Likewise, I have no idea */ km.refSeqAcc = ""; /* Accession of RefSeq mRNA */ km.aliases = ""; /* Aliases if any. Comma and space separated list */ km.locusLinkId = 0; /* Locus link ID */ km.gdbId = ""; /* NCBI GDB database ID */ /* See if it's a disease gene with extra info. */ omimIdString = NULL; rs = hashFindVal(pgiHash, km.gbPgi); if (rs != NULL && rs->locusLinkId != NULL) { km.locusLinkId = atoi(rs->locusLinkId); omimIdString = hashFindVal(mimHash, rs->locusLinkId); } if (rs != NULL && rs->mrnaAcc != NULL) km.refSeqAcc = rs->mrnaAcc; if (omimIdString != NULL) { km.omimId = atoi(omimIdString); /* OMIM ID or 0 if none */ nameOmim = hashFindVal(omimNameHash, omimIdString); if (nameOmim != NULL) { km.name = km.omimName = nameOmim->name; } hm = hashFindVal(hmOmimHash, omimIdString); if (hm != NULL) { km.hugoId = hm->hgnc; /* HUGO Nomeclature Committee ID or 0 if none */ km.name = km.hugoSymbol = hm->symbol; /* HUGO short name */ km.hugoName = hm->name; /* HUGO descriptive name */ km.hugoMap = hm->map; /* HUGO Map position */ km.pmId1 = hm->pmId1; /* I have no idea - grabbed from a HUGO nomeids.txt */ km.pmId2 = hm->pmId2; /* Likewise, I have no idea */ km.refSeqAcc = hm->refSeqAcc; /* Accession of RefSeq mRNA */ km.aliases = hm->aliases; /* Aliases if any. Comma and space separated list */ km.locusLinkId = hm->locusLinkId; /* Locus link ID */ km.gdbId = hm->gdbId; /* NCBI GDB database ID */ } } knownMoreTabOut(&km, f); } carefulClose(&f); printf("Loading database %s\n", database); sqlUpdate(conn, "NOSQLINJ delete from knownMore"); sqlSafef(query, sizeof query, "load data local infile '%s' into table knownMore", tabName); sqlUpdate(conn, query); sqlDisconnect(&conn); }
void loadDatabase(char *database, char *tab, char *track) /* Load database from tab file. */ { struct sqlConnection *conn = sqlConnect(database); struct dyString *dy = newDyString(1024); /* First make table definition. */ if (sqlTable != NULL) { /* Read from file. */ char *sql, *s; readInGulp(sqlTable, &sql, NULL); /* Chop of end-of-statement semicolon if need be. */ s = strchr(sql, ';'); if (s != NULL) *s = 0; sqlRemakeTable(conn, track, sql); freez(&sql); } else if (!oldTable) { /* Create definition statement. */ verbose(1, "Creating table definition for %s\n", track); sqlDyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " level int unsigned not null,\n"); dyStringAppend(dy, " tName varchar(255) not null,\n"); dyStringAppend(dy, " tStart int unsigned not null,\n"); dyStringAppend(dy, " tEnd int unsigned not null,\n"); dyStringAppend(dy, " strand char(1) not null,\n"); dyStringAppend(dy, " qName varchar(255) not null,\n"); dyStringAppend(dy, " qStart int unsigned not null,\n"); dyStringAppend(dy, " qEnd int unsigned not null,\n"); dyStringAppend(dy, " chainId int unsigned not null,\n"); dyStringAppend(dy, " ali int unsigned not null,\n"); dyStringAppend(dy, " score double not null,\n"); dyStringAppend(dy, " qOver int not null, \n"); dyStringAppend(dy, " qFar int not null, \n"); dyStringAppend(dy, " qDup int not null, \n"); dyStringAppend(dy, " type varchar(255) not null,\n"); dyStringAppend(dy, " tN int not null, \n"); dyStringAppend(dy, " qN int not null, \n"); dyStringAppend(dy, " tR int not null, \n"); dyStringAppend(dy, " qR int not null, \n"); dyStringAppend(dy, " tNewR int not null, \n"); dyStringAppend(dy, " qNewR int not null, \n"); dyStringAppend(dy, " tOldR int not null, \n"); dyStringAppend(dy, " qOldR int not null, \n"); dyStringAppend(dy, " tTrf int not null, \n"); dyStringAppend(dy, " qTrf int not null, \n"); dyStringAppend(dy, "#Indices\n"); if (!noBin) dyStringAppend(dy, " INDEX(tName(16),bin),\n"); dyStringAppend(dy, " INDEX(tName(16),tStart)\n"); dyStringAppend(dy, ")\n"); sqlRemakeTable(conn, track, dy->string); } dyStringClear(dy); sqlDyStringPrintf(dy, "load data local infile '%s' into table %s", tab, track); verbose(1, "Loading %s into %s\n", track, database); sqlUpdate(conn, dy->string); /* add a comment to the history table and finish up connection */ hgHistoryComment(conn, "Loaded net table %s", track); sqlDisconnect(&conn); }
struct sqlConnection *edwConnect() /* Returns a read only connection to database. */ { return sqlConnect(edwDatabase); }
void spTest(char *database, char *someAcc) /* spTest - Test out sp library.. */ { struct sqlConnection *conn = sqlConnect(database); char *acc, *id, *binomial, *common; struct slName *geneList, *gene, *accList, *n, *list; struct slName *nameList, *name, *keyList, *key, *typeList, *type; struct spFeature *featList, *feat; struct spCitation *citeList, *cite; char *ret = NULL; int taxon; int classId = 0, typeId = 0, refId = 0; printf("input: %s\n", someAcc); acc = spLookupPrimaryAcc(conn, someAcc); printf("primary accession: %s\n", acc); id = spAccToId(conn, acc); printf("SwissProt id: %s\n", id); printf("acc from id: %s\n", spIdToAcc(conn, id)); ret = spOrganelle(conn, acc); printf("organelle: %s\n", (ret == NULL) ? "(null)" : ret); printf("isCurated: %d\n", spIsCurated(conn, acc)); printf("aaSize: %d\n", spAaSize(conn,acc)); printf("molWeight: %d\n", spMolWeight(conn,acc)); printf("createDate: %s\n", spCreateDate(conn,acc)); printf("seqDate: %s\n", spSeqDate(conn,acc)); printf("annDate: %s\n", spAnnDate(conn,acc)); printf("description: %s\n", spDescription(conn, acc)); taxon = spTaxon(conn, acc); printf("taxon: %d\n", taxon); binomial = spTaxonToBinomial(conn, taxon); printf("first scientific name: %s\n", binomial); common = spTaxonToCommon(conn, taxon); printf("first common name: %s\n", common); printf("taxon from sci: %d\n", spBinomialToTaxon(conn, binomial)); printf("taxon from common: %d\n", spCommonToTaxon(conn, common)); printf("all scientific names:"); nameList = spBinomialNames(conn, acc); for (name = nameList; name != NULL; name = name->next) printf(" %s,", name->name); printf("\n"); printf("gene(s):"); geneList = spGenes(conn,acc); for (gene=geneList; gene != NULL; gene = gene->next) printf(" %s,", gene->name); printf("\n"); for (gene=geneList; gene != NULL; gene = gene->next) { accList = spGeneToAccs(conn, gene->name, 0); printf(" any %s:", gene->name); for (n = accList; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); slFreeList(&accList); printf(" %s %s:", common, gene->name); accList = spGeneToAccs(conn, gene->name, taxon); for (n = accList; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); slFreeList(&accList); } slFreeList(&geneList); printf("keyword(s):"); keyList = spKeywords(conn, acc); for (key = keyList; key != NULL; key = key->next) printf(" %s,", key->name); printf("\n"); for (key = keyList; key != NULL; key = key->next) { accList = spKeywordSearch(conn, key->name, taxon); printPartialList(common, key->name, accList, 4); slFreeList(&accList); break; /* This is a little slow, once is enough. */ } for (key = keyList; key != NULL; key = key->next) { accList = spKeywordSearch(conn, key->name, 0); printPartialList("all", key->name, accList, 4); slFreeList(&accList); break; /* This is a little slow, once is enough. */ } slFreeList(&keyList); printf("All comments:\n"); list = slComments(conn, acc, NULL); for (n = list; n != NULL; n = n->next) printf(" %s\n", n->name); slFreeList(&list); typeList = slCommentTypes(conn); for (type = typeList; type != NULL; type = type->next) { list = slComments(conn, acc, type->name); if (list != NULL) { printf("%s comments:\n", type->name); for (n = list; n != NULL; n = n->next) printf(" %s\n", n->name); slFreeList(&list); } } slFreeList(&typeList); list = spEmblAccs(conn, acc); printf("GenBank/EMBL:"); for (n = list; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); if (list != NULL) printf("acc from %s: %s\n", list->name, spAccFromEmbl(conn, list->name)); slFreeList(&list); list = spPdbAccs(conn, acc); printf("PDB:"); for (n = list; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); featList = spFeatures(conn, acc, 0, 0); printf("All features:\n"); for (feat = featList; feat != NULL; feat = feat->next) { printFeat(conn, feat); classId = feat->featureClass; typeId = feat->featureType; } slFreeList(&featList); if (classId != 0 && typeId != 0) { printf("%s class features:\n", spFeatureClassName(conn, classId)); featList = spFeatures(conn, acc, classId, 0); for (feat = featList; feat != NULL; feat = feat->next) printFeat(conn, feat); slFreeList(&featList); printf("%s type features:\n", spFeatureTypeName(conn, typeId)); featList = spFeatures(conn, acc, 0, typeId); for (feat = featList; feat != NULL; feat = feat->next) printFeat(conn, feat); slFreeList(&featList); printf("same class & type features:\n"); featList = spFeatures(conn, acc, classId, typeId); for (feat = featList; feat != NULL; feat = feat->next) printFeat(conn, feat); slFreeList(&featList); printf("class loop: %d->%s->%d\n", classId, spFeatureClassName(conn, classId), spFeatureClassId(conn, spFeatureClassName(conn, classId))); printf("type loop: %d->%s->%d\n", typeId, spFeatureTypeName(conn, typeId), spFeatureTypeId(conn, spFeatureTypeName(conn, typeId))); } citeList = spCitations(conn, acc); for (cite = citeList; cite != NULL; cite = cite->next) { refId = cite->reference; printf("title: %s\n", spRefTitle(conn, refId)); printf("authors:"); list = spRefAuthors(conn, refId); for (n = list; n != NULL; n = n->next) printf(" %s, ", n->name); printf("\n"); slFreeList(&list); printf("location: %s\n", spRefCite(conn, refId)); printf("pubMed: %s\n", spRefPubMed(conn, refId)); } if (refId != 0) { printf("other accs associated with last reference:\n\t"); list = spRefToAccs(conn, refId); printPartialList("", "", list, 6); slFreeList(&list); } sqlDisconnect(&conn); }
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable) /* hgExpDistance - Create table that measures expression distance between pairs. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[256]; char **row; struct hash *expHash = hashNew(16); int realExpCount = -1; struct microData *geneList = NULL, *curGene, *gene; int geneIx, geneCount = 0; struct microData **geneArray = NULL; float *weights = NULL; char *tempDir = "."; FILE *f = hgCreateTabFile(tempDir, outTable); long time1, time2; time1 = clock1000(); /* Get list/hash of all items with expression values. */ /* uglyf("warning: temporarily limited to 1000 records\n"); */ sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; if (!hashLookup(expHash, name)) { int expCount = sqlUnsigned(row[1]); int commaCount; float *expScores = NULL; sqlFloatDynamicArray(row[2], &expScores, &commaCount); if (expCount != commaCount) errAbort("expCount and expScores don't match on %s in %s", name, posTable); if (realExpCount == -1) realExpCount = expCount; if (expCount != realExpCount) errAbort("In %s some rows have %d experiments others %d", name, expCount, realExpCount); AllocVar(gene); gene->expCount = expCount; gene->expScores = expScores; hashAddSaveName(expHash, name, gene, &gene->name); slAddHead(&geneList, gene); } } sqlFreeResult(&sr); conn = sqlConnect(database); slReverse(&geneList); geneCount = slCount(geneList); printf("Have %d elements in %s\n", geneCount, posTable); weights = getWeights(realExpCount); if (optionExists("lookup")) geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList); geneCount = slCount(geneList); printf("Got %d unique elements in %s\n", geneCount, posTable); sqlDisconnect(&conn); /* Disconnect because next step is slow. */ if (geneCount < 1) errAbort("ERROR: unique gene count less than one ?"); time2 = clock1000(); verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0); /* Get an array for sorting. */ AllocArray(geneArray, geneCount); for (gene = geneList,geneIx=0; gene != NULL; gene = gene->next, ++geneIx) geneArray[geneIx] = gene; /* Print out closest 1000 in tab file. */ for (curGene = geneList; curGene != NULL; curGene = curGene->next) { calcDistances(curGene, geneList, weights); qsort(geneArray, geneCount, sizeof(geneArray[0]), cmpMicroDataDistance); for (geneIx=0; geneIx < 1000 && geneIx < geneCount; ++geneIx) { gene = geneArray[geneIx]; fprintf(f, "%s\t%s\t%f\n", curGene->name, gene->name, gene->distance); } dotOut(); } printf("Made %s.tab\n", outTable); time1 = time2; time2 = clock1000(); verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0); /* Create and load table. */ conn = sqlConnect(database); distanceTableCreate(conn, outTable); hgLoadTabFile(conn, tempDir, outTable, &f); printf("Loaded %s\n", outTable); /* Add indices. */ sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable); sqlUpdate(conn, query); printf("Made query index\n"); if (optionExists("targetIndex")) { sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable); sqlUpdate(conn, query); printf("Made target index\n"); } hgRemoveTabFile(tempDir, outTable); time1 = time2; time2 = clock1000(); verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0); }
void encode2Meta(char *database, char *manifestIn, char *outMetaRa) /* encode2Meta - Create meta files.. */ { int dbIx = stringArrayIx(database, metaDbs, ArraySize(metaDbs)); if (dbIx < 0) errAbort("Unrecognized database %s", database); /* Create a three level meta.ra format file based on hgFixed.encodeExp * and database.metaDb tables. The levels are composite, experiment, file */ struct metaNode *metaTree = metaTreeNew("encode2"); /* Load up the manifest. */ struct encode2Manifest *mi, *miList = encode2ManifestShortLoadAll(manifestIn); struct hash *miHash = hashNew(18); for (mi = miList; mi != NULL; mi = mi->next) hashAdd(miHash, mi->fileName, mi); verbose(1, "%d files in %s\n", miHash->elCount, manifestIn); /* Load up encodeExp info. */ struct sqlConnection *expConn = sqlConnect(expDb); struct encodeExp *expList = encodeExpLoadByQuery(expConn, "NOSQLINJ select * from encodeExp"); sqlDisconnect(&expConn); verbose(1, "%d experiments in encodeExp\n", slCount(expList)); struct hash *compositeHash = hashNew(0); /* Go through each organism database in turn. */ int i; for (i=0; i<ArraySize(metaDbs); ++i) { char *db = metaDbs[i]; if (!sameString(database, db)) continue; verbose(1, "exploring %s\n", db); struct mdbObj *mdb, *mdbList = getMdbList(db); verbose(1, "%d meta objects in %s\n", slCount(mdbList), db); /* Get info on all composites. */ for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *objType = mdbVarLookup(mdb->vars, "objType"); if (objType != NULL && sameString(objType, "composite")) { char compositeName[256]; safef(compositeName, sizeof(compositeName), "%s", mdb->obj); struct metaNode *compositeNode = metaNodeNew(compositeName); slAddHead(&metaTree->children, compositeNode); compositeNode->parent = metaTree; struct mdbVar *v; for (v=mdb->vars; v != NULL; v = v->next) { metaNodeAddVar(compositeNode, v->var, v->val); } metaNodeAddVar(compositeNode, "assembly", db); hashAdd(compositeHash, mdb->obj, compositeNode); } } /* Make up one more for experiments with no composite. */ char *noCompositeName = "wgEncodeZz"; struct metaNode *noCompositeNode = metaNodeNew(noCompositeName); slAddHead(&metaTree->children, noCompositeNode); noCompositeNode->parent = metaTree; hashAdd(compositeHash, noCompositeName, noCompositeNode); /* Now go through objects trying to tie experiments to composites. */ struct hash *expToComposite = hashNew(16); for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *composite = mdbVarLookup(mdb->vars, "composite"); if (originalData(composite)) { char *dccAccession = mdbVarLookup(mdb->vars, "dccAccession"); if (dccAccession != NULL) { char *oldComposite = hashFindVal(expToComposite, dccAccession); if (oldComposite != NULL) { if (!sameString(oldComposite, composite)) verbose(2, "%s maps to %s ignoring mapping to %s", dccAccession, oldComposite, composite); } else { hashAdd(expToComposite, dccAccession, composite); } } } } /* Now get info on all experiments in this organism. */ struct hash *expHash = hashNew(0); struct encodeExp *exp; for (exp = expList; exp != NULL; exp = exp->next) { if (sameString(exp->organism, organisms[i])) { if (exp->accession != NULL) { char *composite = hashFindVal(expToComposite, exp->accession); struct metaNode *compositeNode; if (composite != NULL) { compositeNode = hashMustFindVal(compositeHash, composite); } else { compositeNode = noCompositeNode; } struct metaNode *expNode = wrapNodeAroundExp(exp); hashAdd(expHash, expNode->name, expNode); slAddHead(&compositeNode->children, expNode); expNode->parent = compositeNode; } } } for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *fileName = NULL, *dccAccession = NULL; char *objType = mdbVarLookup(mdb->vars, "objType"); if (objType != NULL && sameString(objType, "composite")) continue; dccAccession = mdbVarLookup(mdb->vars, "dccAccession"); if (dccAccession == NULL) continue; char *composite = hashFindVal(expToComposite, dccAccession); if (composite == NULL) errAbort("Can't find composite for %s", mdb->obj); struct mdbVar *v; for (v = mdb->vars; v != NULL; v = v->next) { char *var = v->var, *val = v->val; if (sameString("fileName", var)) { fileName = val; char path[PATH_LEN]; char *comma = strchr(fileName, ','); if (comma != NULL) *comma = 0; /* Cut off comma separated list. */ safef(path, sizeof(path), "%s/%s/%s", db, composite, fileName); /* Add database path */ fileName = val = v->val = cloneString(path); } } if (fileName != NULL) { if (hashLookup(miHash, fileName)) { struct metaNode *expNode = hashFindVal(expHash, dccAccession); if (expNode != NULL) { struct metaNode *fileNode = metaNodeNew(mdb->obj); slAddHead(&expNode->children, fileNode); fileNode->parent = expNode; struct mdbVar *v; for (v=mdb->vars; v != NULL; v = v->next) { metaNodeAddVar(fileNode, v->var, v->val); } } } } } #ifdef SOON #endif /* SOON */ } struct hash *suppress = makeSuppress(); struct hash *closeEnoughTags = makeCloseEnoughTags(); metaTreeHoist(metaTree, closeEnoughTags); metaTreeSortChildrenSortTags(metaTree); FILE *f = mustOpen(outMetaRa, "w"); struct metaNode *node; for (node = metaTree->children; node != NULL; node = node->next) metaTreeWrite(0, 0, BIGNUM, FALSE, NULL, node, suppress, f); carefulClose(&f); /* Write warning about tags in highest parent. */ struct mdbVar *v; for (v = metaTree->vars; v != NULL; v = v->next) verbose(1, "Omitting universal %s %s\n", v->var, v->val); }
void doStandard(struct cart *theCart) { cart = theCart; cartWebStart(cart, database, "ENCODE DCC Submissions"); struct sqlConnection *conn = sqlConnect(database); struct docIdSub *docIdSub; char query[10 * 1024]; struct sqlResult *sr; char **row; struct tempName tn; trashDirFile(&tn, "docId", "meta", ".txt"); char *tempFile = tn.forCgi; //printf("tempFile is %s\n<BR>", tempFile); // <Data type> <Cell Type> <Key Metadata> <View> printf("<table border=1><tr>"); printf("<th>dataType</th>"); printf("<th>cell type</th>"); printf("<th>metadata</th>"); printf("<th>view</th>"); printf("<th>fileType</th>"); printf("<th>file</th>"); printf("<th>lab</th>"); printf("<th>assembly</th>"); printf("<th>subId</th>"); printf("<th>val-report</th>"); printf("</tr>\n"); safef(query, sizeof query, "select * from %s", docIdTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { docIdSub = docIdSubLoad(row); verbose(2, "ix %d\n", docIdSub->ix); verbose(2, "submitDate %s\n", docIdSub->submitDate); verbose(2, "md5sum %s\n", docIdSub->md5sum); verbose(2, "valReport %s\n", docIdSub->valReport); verbose(2, "metaData %s\n", docIdSub->metaData); verbose(2, "submitPath %s\n", docIdSub->submitPath); verbose(2, "submitter %s\n", docIdSub->submitter); cgiDecode(docIdSub->metaData, docIdSub->metaData, strlen(docIdSub->metaData)); //printf("tempFile %s\n", tempFile); FILE *f = mustOpen(tempFile, "w"); fwrite(docIdSub->metaData, strlen(docIdSub->metaData), 1, f); fclose(f); boolean validated; struct mdbObj *mdbObj = mdbObjsLoadFromFormattedFile(tempFile, &validated); unlink(tempFile); // <Data type> <Cell Type> <Key Metadata> <View> char *docIdType = mdbObjFindValue(mdbObj, "type"); char *docIdComposite = mdbObjFindValue(mdbObj, "composite"); char buffer[10 * 1024]; safef(buffer, sizeof buffer, "%d", docIdSub->ix); if (sameString(database, "encpipeline_beta")) docIdDir = docIdDirBeta; printf("<tr>"); printf("<td>%s</td> ", mdbObjFindValue(mdbObj, "dataType")); printf("<td>%s</td> ", mdbObjFindValue(mdbObj, "cell")); struct dyString *str = newDyString(100); addValue(str, mdbObjFindValue(mdbObj, "antibody")); addValue(str, mdbObjFindValue(mdbObj, "treatment")); addValue(str, mdbObjFindValue(mdbObj, "rnaExtract")); addValue(str, mdbObjFindValue(mdbObj, "localization")); printf("<td>%s<a href=docIdView?docId=%s&db=%s&meta=\"\"> ...</a></td>", str->string,buffer, database); freeDyString(&str); printf("<td>%s</td> ", mdbObjFindValue(mdbObj, "view")); printf("<td>%s</td> ", mdbObjFindValue(mdbObj, "type")); printf("<td><a href=%s> %s</a></td>", docIdGetPath(buffer, docIdDir, docIdType, NULL) , docIdDecorate(docIdComposite,docIdSub->ix)); char *lab = mdbObjFindValue(mdbObj, "lab"); char *subId = mdbObjFindValue(mdbObj, "subId"); printf("<td><a href=docIdView?docId=%s&db=%s&lab=\"%s\"> %s</a></td>",buffer, database, subId, lab); printf("<td>%s</td> ", mdbObjFindValue(mdbObj, "assembly")); printf("<td>%s</td> ", subId); printf("<td><a href=docIdView?docId=%s&db=%s&report=\"\"> report</a></td>", buffer, database); printf("</tr>\n"); } printf("</table>"); sqlFreeResult(&sr); sqlDisconnect(&conn); cartWebEnd(); }
void hgLoadRnaFold(char *database, char *table, char *foldDir) /* hgLoadRnaFold - Load a directory full of RNA fold files into database. */ { char path[PATH_LEN]; struct slName *dirList, *dirEl; struct lineFile *lf; char *line, *word, *s, c; FILE *f = hgCreateTabFile(tabDir, table); int count = 0; dirList = listDir(foldDir, "*"); for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next) { char *name = dirEl->name; if (sameString(name, "CVS")) continue; safef(path, sizeof(path), "%s/%s", foldDir, name); lf = lineFileOpen(path, TRUE); if (!lineFileNext(lf, &line, NULL)) { if (warnEmpty) { warn("%s is empty, skipping\n", name); lineFileClose(&lf); continue; } else errAbort("%s is empty\n", name); } if (!isupper(line[0])) notFold(path, 1); fprintf(f, "%s\t", name); /* Save name */ fprintf(f, "%s\t", line); /* Save sequence */ lineFileNeedNext(lf, &line, NULL); c = line[0]; if (c != '.' && c != '(') notFold(path, 2); word = nextWord(&line); fprintf(f, "%s\t", word); /* Save nested parenthesis */ /* Parse out (energy) term at end of line. */ s = strchr(line, '('); if (s == NULL) notFold(path, 3); word = skipLeadingSpaces(s+1); if (word == NULL || (!word[0] == '-' && !isdigit(word[0]))) notFold(path, 4); if ((s = strchr(word, ')')) == NULL) notFold(path, 5); *s = 0; fprintf(f, "%s\n", word); lineFileClose(&lf); ++count; } printf("Parsed %d files\n", count); if (doLoad) { struct sqlConnection *conn = sqlConnect(database); rnaFoldCreateTable(conn, table); hgLoadTabFile(conn, tabDir, table, &f); hgRemoveTabFile(tabDir, table); sqlDisconnect(&conn); } }
void hgWaba(char *database, char *species, char *chromosome, int chromOffset, int wabaFileCount, char *wabaFile[]) /* hgWaba - load Waba alignments into database. */ { struct sqlConnection *conn = sqlConnect(database); FILE *fullTab, *chromTab; FILE *in; struct xaAli *xa, *xaList = NULL; char fullTabName[512], chromTabName[512]; char fullTable[128], chromTable[128]; char *inFile; int i; struct dyString *query = newDyString(2048); /* Loop through each waba file grabbing sequence into * memory, then sort. */ for (i = 0; i < wabaFileCount; ++i) { inFile = wabaFile[i]; printf("Processing %s\n", inFile); in = xaOpenVerify(inFile); while ((xa = xaReadNext(in, FALSE)) != NULL) { xa->tStart += chromOffset; xa->tEnd += chromOffset; slAddHead(&xaList, xa); } carefulClose(&in); } printf("Sorting %d alignments by chromosome position\n", slCount(xaList)); slSort(&xaList, xaAliCmpTstart); /* Create names of tables and the tables themselves. * Clear anything in the chrom table. */ sprintf(fullTable, "waba%s", species); sprintf(chromTable, "%s_waba%s", chromosome, species); dyStringClear(query); sqlDyStringPrintf(query, wabaFullCreate, fullTable); sqlMaybeMakeTable(conn, fullTable, query->string); dyStringClear(query); sqlDyStringPrintf(query, wabaChromCreate, chromTable); sqlMaybeMakeTable(conn, chromTable, query->string); if (chromOffset == 0) { dyStringClear(query); sqlDyStringPrintf(query, "DELETE from %s", chromTable); sqlUpdate(conn, query->string); } /* Make a temp file for each table we'll update. */ strcpy(fullTabName, "full_waba.tab"); fullTab = mustOpen(fullTabName, "w"); strcpy(chromTabName, "chrom_waba.tab"); chromTab = mustOpen(chromTabName, "w"); /* Write out tab-delimited files. */ printf("Writing tab-delimited files\n"); for (xa = xaList; xa != NULL; xa = xa->next) { int squeezedSize; squeezedSize = squeezeSym(xa->tSym, xa->hSym, xa->symCount, xa->hSym); if( squeezedSize != xa->tEnd - xa->tStart ) { printf("%s squeezedSize: %d, tEnd, tStart: %d, %d, diff: %d\n", xa->query, squeezedSize, xa->tEnd, xa->tStart, xa->tEnd - xa->tStart ); } else { fprintf(fullTab, "%s\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n", /*xa->query, xa->qStart, xa->qEnd, xa->qStrand,*/ xa->name, xa->qStart, xa->qEnd, xa->qStrand, chromosome, xa->tStart, xa->tEnd, xa->milliScore, xa->symCount, xa->qSym, xa->tSym, xa->hSym); assert(squeezedSize == xa->tEnd - xa->tStart); fprintf(chromTab, "%s\t%d\t%d\t%c\t%d\t%s\n", /*xa->query, xa->tStart, xa->tEnd, xa->qStrand,*/ xa->name, xa->tStart, xa->tEnd, xa->qStrand, xa->milliScore, xa->hSym); } } fclose(fullTab); fclose(chromTab); printf("Loading %s table in %s\n", chromTable, database); dyStringClear(query); sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s", chromTabName, chromTable); sqlUpdate(conn, query->string); printf("Loading %s table in %s\n", fullTable, database); dyStringClear(query); sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s", fullTabName, fullTable); sqlUpdate(conn, query->string); printf("Done!\n"); // remove(fullTabName); // remove(chromTabName); sqlDisconnect(&conn); freeDyString(&query); }
void knownToVisiGene(char *database) /* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */ { char *tempDir = "."; FILE *f = hgCreateTabFile(tempDir, outTable); struct sqlConnection *hConn = sqlConnect(database); struct sqlConnection *iConn = sqlConnect(visiDb); struct sqlResult *sr; char **row; struct hash *geneImageHash = newHash(18); struct hash *locusLinkImageHash = newHash(18); struct hash *refSeqImageHash = newHash(18); struct hash *genbankImageHash = newHash(18); struct hash *probeImageHash = newHash(18); struct hash *knownToLocusLinkHash = newHash(18); struct hash *knownToRefSeqHash = newHash(18); struct hash *knownToGeneHash = newHash(18); struct hash *favorHugoHash = newHash(18); struct hash *knownToProbeHash = newHash(18); struct hash *knownToAllProbeHash = newHash(18); struct genePred *knownList = NULL, *known; struct hash *dupeHash = newHash(17); probesDb = optionVal("probesDb", database); struct sqlConnection *probesConn = sqlConnect(probesDb); vgProbes = sqlTableExists(probesConn,"vgProbes"); vgAllProbes = sqlTableExists(probesConn,"vgAllProbes"); /* Go through and make up hashes of images keyed by various fields. */ sr = sqlGetResult(iConn, NOSQLINJ "select image.id,imageFile.priority,gene.name,gene.locusLink,gene.refSeq,gene.genbank" ",probe.id,submissionSet.privateUser,vgPrbMap.vgPrb,gene.id" " from image,imageFile,imageProbe,probe,gene,submissionSet,vgPrbMap" " where image.imageFile = imageFile.id" " and image.id = imageProbe.image" " and imageProbe.probe = probe.id" " and probe.gene = gene.id" " and image.submissionSet=submissionSet.id" " and vgPrbMap.probe = probe.id"); while ((row = sqlNextRow(sr)) != NULL) { int id = sqlUnsigned(row[0]); float priority = atof(row[1]); int privateUser = sqlSigned(row[7]); char vgPrb_Id[256]; safef(vgPrb_Id, sizeof(vgPrb_Id), "vgPrb_%s",row[8]); int geneId = sqlUnsigned(row[9]); if (privateUser == 0) { addPrioritizedImage(probeImageHash, id, priority, geneId, vgPrb_Id); addPrioritizedImage(geneImageHash, id, priority, geneId, row[2]); addPrioritizedImage(locusLinkImageHash, id, priority, geneId, row[3]); addPrioritizedImage(refSeqImageHash, id, priority, geneId, row[4]); addPrioritizedImage(genbankImageHash, id, priority, geneId, row[5]); } } verbose(2, "Made hashes of image: geneImageHash %d, locusLinkImageHash %d, refSeqImageHash %d" ", genbankImageHash %d probeImageHash %d\n", geneImageHash->elCount, locusLinkImageHash->elCount, refSeqImageHash->elCount, genbankImageHash->elCount, probeImageHash->elCount); sqlFreeResult(&sr); /* Build up list of known genes. */ sr = sqlGetResult(hConn, NOSQLINJ "select * from knownGene"); while ((row = sqlNextRow(sr)) != NULL) { struct genePred *known = genePredLoad(row); if (!hashLookup(dupeHash, known->name)) { hashAdd(dupeHash, known->name, NULL); slAddHead(&knownList, known); } } slReverse(&knownList); sqlFreeResult(&sr); verbose(2, "Got %d known genes\n", slCount(knownList)); /* Build up hashes from knownGene to other things. */ if (vgProbes) bestProbeOverlap(probesConn, "vgProbes", knownList, knownToProbeHash); if (vgAllProbes) bestProbeOverlap(probesConn, "vgAllProbes", knownList, knownToAllProbeHash); foldIntoHash(hConn, "knownToLocusLink", "name", "value", knownToLocusLinkHash, NULL, FALSE); foldIntoHash(hConn, "knownToRefSeq", "name", "value", knownToRefSeqHash, NULL, FALSE); foldIntoHash(hConn, "kgXref", "kgID", "geneSymbol", knownToGeneHash, favorHugoHash, FALSE); foldIntoHash(hConn, "kgAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE); foldIntoHash(hConn, "kgProtAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE); verbose(2, "knownToLocusLink %d, knownToRefSeq %d, knownToGene %d knownToProbe %d knownToAllProbe %d\n", knownToLocusLinkHash->elCount, knownToRefSeqHash->elCount, knownToGeneHash->elCount, knownToProbeHash->elCount, knownToAllProbeHash->elCount); /* Try and find an image for each gene. */ for (known = knownList; known != NULL; known = known->next) { char *name = known->name; struct prioritizedImage *best = NULL; { best = bestImage(name, knownToLocusLinkHash, locusLinkImageHash); if (!best) best = bestImage(name, knownToRefSeqHash, refSeqImageHash); if (!best) { best = hashFindVal(genbankImageHash, name); } if (!best) best = bestImage(name, knownToGeneHash, geneImageHash); if (vgProbes && !best) best = bestImage(name, knownToProbeHash, probeImageHash); if (vgAllProbes && !best) best = bestImage(name, knownToAllProbeHash, probeImageHash); } if (best) { fprintf(f, "%s\t%d\t%d\n", name, best->imageId, best->geneId); } } createTable(hConn, outTable); hgLoadTabFile(hConn, tempDir, outTable, &f); hgRemoveTabFile(tempDir, outTable); }
struct genePred *getOverlappingGeneDb(struct genePred **list, char *table, char *chrom, int cStart, int cEnd, char *name, int *retOverlap, char *db) { /* read all genes from a table find the gene with the biggest overlap. Cache the list of genes to so we only read it once */ struct genePred *el = NULL, *bestMatch = NULL, *gp = NULL; int overlap = 0 , bestOverlap = 0, i; int *eFrames; if (list == NULL) return NULL; if (*list == NULL) { struct genePred *gpList = NULL; struct sqlConnection *conn = sqlConnect(db); struct genePredReader *gpr = NULL; if (!hTableExistsDb(db,table)) table = altTable; if (!hTableExistsDb(db,table)) { verbose(2,"no table %s in %s\n",table, db); return NULL; } gpr = genePredReaderQuery(conn, table, NULL); verbose(1,"Loading Predictions from %s in %s\n",table, db); gpList = genePredReaderAll(gpr); if (gpList != NULL) { hashAdd(geneListHash, db, gpList); *list = gpList; } sqlDisconnect(&conn); } for (el = *list; el != NULL; el = el->next) { if (chrom != NULL && el->chrom != NULL) { overlap = 0; if ( sameString(chrom, el->chrom)) { for (i = 0 ; i<(el->exonCount); i++) { overlap += positiveRangeIntersection(cStart,cEnd, el->exonStarts[i], el->exonEnds[i]) ; } if (overlap > 20 && sameString(name, el->name)) { bestMatch = el; bestOverlap = overlap; *retOverlap = bestOverlap; } if (overlap > bestOverlap) { bestMatch = el; bestOverlap = overlap; *retOverlap = bestOverlap; } } } } if (bestMatch != NULL) { /* Allocate genePred and fill in values. */ AllocVar(gp); gp->name = cloneString(bestMatch->name); gp->chrom = cloneString(bestMatch->chrom); gp->strand[1] = bestMatch->strand[1]; gp->strand[0] = bestMatch->strand[0]; gp->txStart = bestMatch->txStart; gp->txEnd = bestMatch->txEnd; gp->cdsStart = bestMatch->cdsStart; gp->cdsEnd = bestMatch->cdsEnd; gp->exonCount = bestMatch->exonCount; AllocArray(gp->exonStarts, bestMatch->exonCount); AllocArray(gp->exonEnds, bestMatch->exonCount); for (i=0; i<bestMatch->exonCount; ++i) { gp->exonStarts[i] = bestMatch->exonStarts[i] ; gp->exonEnds[i] = bestMatch->exonEnds[i] ; } gp->optFields = bestMatch->optFields; gp->id = bestMatch->id; if (bestMatch->optFields & genePredName2Fld) gp->name2 = cloneString(bestMatch->name2); else gp->name2 = NULL; if (bestMatch->optFields & genePredCdsStatFld) { gp->cdsStartStat = bestMatch->cdsStartStat; gp->cdsEndStat = bestMatch->cdsEndStat; } if (bestMatch->optFields & genePredExonFramesFld) { gp->exonFrames = AllocArray(eFrames, bestMatch->exonCount); for (i = 0; i < bestMatch->exonCount; i++) gp->exonFrames[i] = bestMatch->exonFrames[i]; } eFrames = gp->exonFrames; } return gp; }
void txGeneColor(char *uniProtDb, char *infoFile, char *pickFile, char *outFile) /* txGeneColor - Figure out color to draw gene in.. */ { /* Load picks into hash. We don't use cdsPicksLoadAll because empty fields * cause that autoSql-generated routine problems. */ struct hash *pickHash = newHash(18); struct cdsPick *pick; struct lineFile *lf = lineFileOpen(pickFile, TRUE); char *row[CDSPICK_NUM_COLS]; while (lineFileRowTab(lf, row)) { pick = cdsPickLoad(row); hashAdd(pickHash, pick->name, pick); } /* Open uniprot database connection. */ struct sqlConnection *uConn = sqlConnect(uniProtDb); #ifdef OLD /* Figure out our light and medium colors. */ mediumBlue.r = (6*trueBlue.r + 4*255)/10; mediumBlue.g = (6*trueBlue.g + 4*255)/10; mediumBlue.b = (6*trueBlue.b + 4*255)/10; lightBlue.r = (1*trueBlue.r + 2*255)/3; lightBlue.g = (1*trueBlue.g + 2*255)/3; lightBlue.b = (1*trueBlue.b + 2*255)/3; #endif /* OLD */ /* Read in info file, and loop through it to make out file. */ struct txInfo *info, *infoList = txInfoLoadAll(infoFile); FILE *f = mustOpen(outFile, "w"); for (info = infoList; info != NULL; info = info->next) { struct rgbColor *col; pick = hashFindVal(pickHash, info->name); if (pick != NULL) { char *source = pick->source; if (sameString(source, "RefPepValidated")) col = &trueBlue; else if (sameString(source, "ccds")) col = &trueBlue; else if (sameString(source, "RefPepReviewed")) col = &trueBlue; else if (sameString(source, "RefSeqValidated")) col = &trueBlue; else if (sameString(source, "RefSeqReviewed")) col = &trueBlue; else if (sameString(source, "swissProt")) col = &trueBlue; else if (startsWith("Ref", source)) col = &mediumBlue; else col = &lightBlue; if (pick->swissProt[0] != 0) { char *acc = spLookupPrimaryAcc(uConn, pick->swissProt); struct slName *pdbList = spPdbAccs(uConn, acc); if (pdbList != NULL) col = &black; slFreeList(&pdbList); } } else col = &lightBlue; fprintf(f, "%s\t%d\t%d\t%d\n", info->name, col->r, col->g, col->b); } carefulClose(&f); }
void docIdTidy(char *database, char *docIdDir) /* docIdTidy - tidy up the docId library by compressing and md5suming where appropriate. */ { char query[10 * 1024]; struct sqlResult *sr; char **row; struct sqlConnection *conn = sqlConnect(database); struct tempName tn; trashDirFile(&tn, "docId", "meta", ".txt"); char *tempFile = tn.forCgi; struct toDoList *toDoList = NULL; safef(query, sizeof query, "select * from %s", docIdTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct docIdSub *docIdSub = docIdSubLoad(row); cgiDecode(docIdSub->metaData, docIdSub->metaData, strlen(docIdSub->metaData)); FILE *f = mustOpen(tempFile, "w"); fwrite(docIdSub->metaData, strlen(docIdSub->metaData), 1, f); fclose(f); boolean validated; //printf("metadata is %s\n", docIdSub->metaData); struct mdbObj *mdbObj = mdbObjsLoadFromFormattedFile(tempFile, &validated); unlink(tempFile); char *docIdType = mdbObjFindValue(mdbObj, "type"); char buffer[10 * 1024]; safef(buffer, sizeof buffer, "%d", docIdSub->ix); char *path = docIdGetPath(buffer, docIdDir, docIdType, NULL); //docIdDecorate(docIdSub->ix)); printf("path %s\n", path); struct toDoList *toDoItem = NULL; if (!fileIsCompressed(path)) { if (toDoItem == NULL) AllocVar(toDoItem); printf("foo\n"); toDoItem->needs |= NEEDS_COMPRESSION; toDoItem->path = path; toDoItem->docId = docIdSub->ix; } printf("docId %d md5sum %s valReport %s\n",docIdSub->ix, docIdSub->md5sum, docIdSub->valReport); //if (docIdSub->md5sum == NULL) if (sameString(docIdSub->md5sum, "")) { printf("mdsum\n"); if (toDoItem == NULL) AllocVar(toDoItem); toDoItem->needs |= NEEDS_MD5SUM; toDoItem->path = path; toDoItem->docId = docIdSub->ix; } //if (docIdSub->valReport == NULL) if (sameString(docIdSub->valReport,"")) { printf("report\n"); if (toDoItem == NULL) AllocVar(toDoItem); toDoItem->needs |= NEEDS_REPORT; toDoItem->path = path; toDoItem->docId = docIdSub->ix; } if (toDoItem) slAddHead(&toDoList, toDoItem); } sqlFreeResult(&sr); doCompression(toDoList); doReports(toDoList); doMd5Summing(toDoList); }