struct slName *slComments(struct sqlConnection *conn, char *acc, /* Primary accession. */ char *type) /* Comment type name, NULL for all comments. */ /* Get list of comments associated with accession. * If optional type parameter is included it should be * something in the commentType table. Some good types * are: DISEASE, FUNCTION, "SUBCELLULAR LOCATION" etc. */ { char query[256]; if (type == NULL) { safef(query, sizeof(query), "select commentVal.val from comment,commentVal " "where comment.acc = '%s' " "and comment.commentVal = commentVal.id" , acc); } else { int typeId; safef(query, sizeof(query), "select id from commentType where val = '%s'", type); typeId = sqlNeedQuickNum(conn, query); safef(query, sizeof(query), "select commentVal.val from comment,commentVal " "where comment.acc = '%s' " "and comment.commentType = %d " "and comment.commentVal = commentVal.id " , acc, typeId); } return sqlQuickList(conn, query); }
int hubTrackSettings(char *hubUrl, struct hash *totals) /* Read hub trackDb files, noting settings used. If hubUrl is NULL, do this for * all public hubs */ { if (hubUrl != NULL) return oneHubTrackSettings(hubUrl, totals); // Get all urls from hubPublic table struct sqlConnection *conn = hConnectCentral(); char query[1024]; // NOTE: don't bother with site-local names for these tables /* sqlSafef(query, sizeof(query), "select hubUrl from %s where hubUrl not in (select hubUrl from %s)\n", defaultHubPublicTableName, defaultHubStatusTableName); */ sqlSafef(query, sizeof(query), "select hubUrl from %s where shortLabel not like '%%Test%%'", defaultHubPublicTableName); struct slName *hub, *hubs = sqlQuickList(conn, query); int errs = 0; for (hub = hubs; hub != NULL; hub = hub->next) { errs += oneHubTrackSettings(hub->name, totals); } if (totals) printCounts(totals); return errs; }
struct slName *spKeywordSearch(struct sqlConnection *conn, char *keyword, int taxon) /* Return list of accessions that use keyword. If taxon is non-zero * then restrict accessions to given organism. */ { char query[256]; int kwId; safef(query, sizeof(query), "select id from keyword where val = '%s'", keyword); kwId = sqlQuickNum(conn, query); if (kwId == 0) return NULL; if (taxon == 0) { safef(query, sizeof(query), "select acc from accToKeyword where keyword = %d", kwId); } else { safef(query, sizeof(query), "select accToKeyword.acc from accToKeyword,accToTaxon " "where accToKeyword.keyword = %d " "and accToKeyword.acc = accToTaxon.acc " "and accToTaxon.taxon = %d" , kwId, taxon); } return sqlQuickList(conn, query); }
struct genoGraph *getCompGraphs(struct sqlConnection *conn) /* Get graphs defined in database that are part of a composite. */ { struct genoGraph *list = NULL, *gg; struct sqlConnection *conn2 = hAllocConn(database); struct slName *compositeGGList = NULL, *comp; /* Get initial information from metaChromGraph table */ if (sqlTableExists(conn, "metaChromGraph")) compositeGGList = sqlQuickList(conn, "select name from metaChromGraph where binaryFile='composite'"); /* Build a hash of genoGraphs out of composite trackDbs and fill in from cart. */ for (comp = compositeGGList; comp != NULL; comp = comp->next) { struct trackDb *tdb = hTrackDbForTrack(database, comp->name); if (tdb) { struct chromGraphSettings *cgs = chromGraphSettingsGet(comp->name, conn2, tdb, cart); AllocVar(gg); gg->name = cloneString(comp->name); gg->shortLabel = tdb->shortLabel; gg->longLabel = tdb->longLabel; gg->settings = cgs; gg->isSubGraph = FALSE; gg->isComposite = TRUE; slAddHead(&list, gg); } } hFreeConn(&conn2); slReverse(&list); return list; }
struct slName *slCommentTypes(struct sqlConnection *conn) /* Get list of comment types in database. */ { char query[256]; safef(query, sizeof(query), "select val from commentType"); return sqlQuickList(conn, query); }
struct slName *spRefToAccs(struct sqlConnection *conn, int refId) /* Get list of accessions associated with reference. */ { char query[256]; safef(query, sizeof(query), "select acc from citation where reference = %d", refId); return sqlQuickList(conn, query); }
struct slName *spTaxons(struct sqlConnection *conn, char *acc) /* Return list of taxons associated with accession */ { char query[256]; safef(query, sizeof(query), "select taxon from accToTaxon where acc = '%s'", acc); return sqlQuickList(conn, query); }
struct slName *spGenes(struct sqlConnection *conn, char *acc) /* Return list of genes associated with accession */ { char query[256]; safef(query, sizeof(query), "select val from gene where acc = '%s'", acc); return sqlQuickList(conn, query); }
void dbFindFieldsWith(char *database, char *regExp, char *output) /* dbFindFieldsWith - Look through database and find fields that have elements matching a certain pattern in the first N rows.. */ { regex_t re; int err = regcomp(&re, regExp, REG_NOSUB|REG_EXTENDED); if (err < 0) errAbort("regcomp failed code %d", err); struct sqlConnection *conn = sqlConnect(database); struct slName *table, *tableList = sqlQuickList(conn, "NOSQLINJ show tables"); FILE *f = mustOpen(output, "w"); for (table = tableList; table != NULL; table = table->next) { char query[256]; sqlSafef(query, sizeof(query), "select * from %s limit %d", table->name, maxRows); verbose(2, "%s.%s\n", database, table->name); struct sqlResult *sr = sqlGetResult(conn, query); if (sr != NULL) { int colCount = sqlCountColumns(sr); /* Get labels for columns */ char **labels; AllocArray(labels, colCount); int i; for (i=0; i<colCount; ++i) labels[i] = sqlFieldName(sr); /* Get flags that say which fields we've reported. */ bool *flags; AllocArray(flags, colCount); char **row; while ((row = sqlNextRow(sr)) != NULL) { int i; for (i=0; i<colCount; ++i) { char *field = row[i]; if (field != NULL && field[0] != 0) { if (regexec(&re, row[i], 0, NULL, 0) == 0) { if (!flags[i]) { flags[i] = TRUE; fprintf(f, "%s\t%s\t%s\n", table->name, labels[i], row[i]); } } } } } sqlFreeResult(&sr); freez(&flags); freez(&labels); } } carefulClose(&f); }
struct slName *spKeywords(struct sqlConnection *conn, char *acc) /* Return list of keywords for accession. */ { char query[256]; safef(query, sizeof(query), "select keyword.val from accToKeyword,keyword " "where accToKeyword.acc = '%s' " "and accToKeyword.keyword = keyword.id" , acc); return sqlQuickList(conn, query); }
struct slName *spBinomialNames(struct sqlConnection *conn, char *acc) /* Return list of scientific names of organisms * associated with accessoin */ { char query[256]; safef(query, sizeof(query), "select binomial from accToTaxon,taxon " "where accToTaxon.acc = '%s' and accToTaxon.taxon = taxon.id" , acc); return sqlQuickList(conn, query); }
struct slName *spProteinEvidence(struct sqlConnection *conn, char *acc) /* Get list of evidence that protein exists for accession. There will be at least one. */ { char query[256]; safef(query, sizeof(query), "select proteinEvidenceType.val from proteinEvidence,proteinEvidenceType " "where proteinEvidence.acc = '%s' " "and proteinEvidence.proteinEvidenceType = proteinEvidenceType.id" , acc); return sqlQuickList(conn, query); }
static void visiGeneMatchContributor(struct visiSearcher *searcher, struct sqlConnection *conn, struct slName *wordList) /* Put images from contributors in wordList into searcher. * We want the behavior to be such that if you give it two names * say "Smith Mahoney" it will weigh those that match both * names. We also want it so that if you include the initials * after the last name either with or without periods that will * set those matching the last name and initials. For * instance "Smith JJ" or "Smith J.J." or "Smith J. J." all would * match a particular John Jacob Smith, but not Francis K. Smith. * Making this a little more interesting is a case like * "smith li" which could either be two last names, or a last * name followed by initials. We would want to match both * cases. Finally, making it even more interesting, is the * case where the last name is compound, like "Van Koppen" or * "de la Cruz" and the like. Also don't forget the apostrophe * containing names like O'Shea. */ { struct slName *word; struct dyString *query = dyStringNew(0); int wordIx; for (word = wordList, wordIx=0; word != NULL; wordIx++) { struct slName *nameList, *name; int maxWordsUsed = 0; dyStringClear(query); dyStringPrintf(query, "select name from contributor where name like \""); dyStringAppend(query, word->name); dyStringAppend(query, " %\""); nameList = sqlQuickList(conn, query->string); if (nameList != NULL) { for (name = nameList; name != NULL; name = name->next) { int wordsUsed = countPartsUsedInName(name->name, word); if (wordsUsed > maxWordsUsed) maxWordsUsed = wordsUsed; } for (name = nameList; name != NULL; name = name->next) { if (countPartsUsedInName(name->name, word) == maxWordsUsed) addImagesMatchingName(searcher, conn, query, name->name, wordIx, maxWordsUsed); } while (--maxWordsUsed >= 0) word = word->next; } else word = word->next; slFreeList(&nameList); } dyStringFree(&query); }
struct slName *spRefAuthors(struct sqlConnection *conn, int refId) /* Get list of authors associated with reference. */ { char query[256]; safef(query, sizeof(query), "select author.val from referenceAuthors,author " "where referenceAuthors.reference = %d " "and referenceAuthors.author = author.id" , refId); return sqlQuickList(conn, query); }
void submitToDir(struct sqlConnection *conn, struct sqlConnection *conn2, struct sqlConnection *connSp, char *outDir, char *inJax) /* Create directory full of visiGeneLoad .ra/.tab files from * jackson database connection. Creates a pair of files for * each submission set. Returns outDir. */ { struct dyString *query = dyStringNew(0); struct slName *ref, *refList = sqlQuickList(conn, NOSQLINJ "select distinct(_Refs_key) from GXD_Assay"); int refCount = 0; makeDir(outDir); for (ref = refList; ref != NULL; ref = ref->next) { char path[PATH_LEN]; char *pub=NULL; boolean skip; /* Check that it isn't on our skip list - one that we * have already in the database from a higher resolution * source. */ dyStringClear(query); sqlDyStringPrintf(query, "select title from BIB_Refs where _Refs_key = %s", ref->name); pub = sqlQuickString(conn, query->string); if (!pub) { verbose(1,"ref %s: missing title from BIB_Refs, ref skipped\n",ref->name); continue; } skip = oneSubmissionSet ? oneSubmissionSet != sqlSigned(ref->name) : FALSE; if (!skip) { safef(path, sizeof(path), "%s/%s", outDir, ref->name); submitRefToFiles(conn, conn2, connSp, ref->name, path, inJax); refCount += 1; if (testMax != 0 && refCount >= testMax) errAbort("Reached testMax %d output dirs [%s]\n", testMax, path); } freeMem(pub); } verbose(1,"refCount=%d\n",refCount); slNameFreeList(&refList); dyStringFree(&query); }
static void visiGeneMatchMultiWord(struct visiSearcher *searcher, struct sqlConnection *conn, struct slName *wordList, char *table, char *field, AdderFunc adder) /* This helps cope with matches that may involve more than * one word. It will preferentially match as many words * as possible, and if there is a multiple-word match it * will take that over a single-word match. */ { struct slName *word; struct dyString *query = dyStringNew(0); int wordIx; for (word = wordList, wordIx=0; word != NULL; ++wordIx) { struct slName *nameList = NULL, *name; int maxWordsUsed = 0; if (strlen(word->name) >= 3) /* Logic could be expensive on small words */ { dyStringClear(query); dyStringPrintf(query, "select %s from %s where %s like \"", field, table, field); dyStringAppend(query, word->name); dyStringAppend(query, "%\""); nameList = sqlQuickList(conn, query->string); if (nameList != NULL) { for (name = nameList; name != NULL; name = name->next) { int wordsUsed = countWordsUsedInPhrase(name->name, word); if (wordsUsed > maxWordsUsed) maxWordsUsed = wordsUsed; } } } if (maxWordsUsed > 0) { for (name = nameList; name != NULL; name = name->next) { if (countWordsUsedInPhrase(name->name, word) == maxWordsUsed) (*adder)(searcher, conn, query, name->name, wordIx, maxWordsUsed); } while (--maxWordsUsed >= 0) word = word->next; } else word = word->next; slFreeList(&nameList); } dyStringFree(&query); }
void savePsl(struct sqlConnection *conn, struct hash *accHash, char *fileName) /* Save EST and mRNAs aligments that are in hash table to file */ { FILE *f = mustOpen(fileName, "w"); struct slName *table, *splicedEstList; splicedEstList = sqlQuickList(conn, "NOSQLINJ show tables like 'chr%_intronEst'"); writeMatchingPsl(conn, "all_mrna", accHash, f); for (table = splicedEstList; table != NULL; table = table->next) writeMatchingPsl(conn, table->name, accHash, f); // writeMatchingPsl(conn, "all_est", accHash, f); carefulClose(&f); }
struct slName *lsSnpPdbChimeraGetSnpPdbs(struct sqlConnection *conn, char *snpId) /* get list of PDBs to which snpId is mapped. */ { if (!sqlTableExists(conn, "lsSnpPdb")) return NULL; char query[256]; sqlSafef(query, sizeof(query), "SELECT distinct pdbId FROM lsSnpPdb WHERE (snpId = \"%s\")", snpId); struct slName *pdbIds = sqlQuickList(conn, query); slNameSort(&pdbIds); return pdbIds; }
void edwRunOnIds(char *program, char *queryString) /* edwRunOnIds - Run a edw command line program (one that takes startId endId as it's two parameters) for a range of ids, * putting it on edwJob queue. */ { struct sqlConnection *conn = edwConnectReadWrite(); struct slName *id, *idList = sqlQuickList(conn, queryString); for (id = idList; id != NULL; id = id->next) { char query[512]; sqlSafef(query, sizeof(query), "insert into %s (commandLine) values ('%s %s %s')", runTable, program, id->name, id->name); sqlUpdate(conn, query); } }
struct slName *spExtDbAcc1List(struct sqlConnection *conn, char *acc, char *db) /* Get list of accessions from external database associated with this * swissProt entity. The db parameter can be anything in the * extDb table. Some common external databases are 'EMBL' 'PDB' 'Pfam' * 'Interpro'. */ { char query[256]; safef(query, sizeof(query), "select extDbRef.extAcc1 from extDbRef,extDb " "where extDbRef.acc = '%s' " "and extDbRef.extDb = extDb.id " "and extDb.val = '%s'" , acc, db); return sqlQuickList(conn, query); }
struct slName *queryAltFixNames(struct sqlConnection *conn, char *table, char *term, char *excludeSuffix, boolean prefixOnly) /* If table exists, return names in table that match term, otherwise NULL. * Chop after ':' if there is one and exclude items that end with excludeSuffix so the * mappings between _alt and _fix sequences don't sneak into the wrong category's results. */ { struct slName *names = NULL; if (sqlTableExists(conn, table)) { char query[2048]; sqlSafef(query, sizeof query, "select distinct(substring_index(name, ':', 1)) from %s " "where name like '%s%s%%' " "and name not like '%%%s' and name not like '%%%s:%%'" "order by name", table, (prefixOnly ? "" : "%"), escapeAltFixTerm(term), excludeSuffix, excludeSuffix); names = sqlQuickList(conn, query); } return names; }
boolean findInKnownCanonical(struct sqlConnection *conn, char *geneSymbol, FILE *outF) /* For each knownGene cluster for this geneSymbol, print output using the * cluster bounds as {chrom,chromStart,chromEnd} and return TRUE. */ { boolean found = FALSE; char query[1024]; sqlSafef(query, sizeof(query), "select distinct clusterId from kgXref x, knownIsoforms i " "where x.geneSymbol='%s' and i.transcript=x.kgId", geneSymbol); struct slName *id, *clusterIds = sqlQuickList(conn, query); for (id = clusterIds; id != NULL; id = id->next) { sqlSafef(query, sizeof(query), "select k.chrom, min(txStart), max(txEnd) " "from knownGene k, knownIsoforms i, knownCanonical c " "where i.clusterId=%s and i.transcript=k.name " "and c.clusterId=i.clusterId and k.chrom=c.chrom", id->name); found |= printBed4FromQueryAndName(conn, query, geneSymbol, outF); } slFreeList(&clusterIds); return found; }
struct hash* searchForKeywords(struct sqlConnection* conn, char *articleTable, char *keywords) /* return hash with the articleIds that contain a given keyword in the abstract/title/authors */ { if (isEmpty(keywords)) return NULL; char query[12000]; sqlSafef(query, sizeof(query), "SELECT articleId FROM %s WHERE " "MATCH (citation, title, authors, abstract) AGAINST ('%s' IN BOOLEAN MODE)", articleTable, keywords); //printf("query %s", query); struct slName *artIds = sqlQuickList(conn, query); if (artIds==NULL || slCount(artIds)==0) return NULL; // convert list to hash struct hash *hashA = hashNew(0); struct slName *el; for (el = artIds; el != NULL; el = el->next) hashAddInt(hashA, el->name, 1); freeMem(keywords); slFreeList(artIds); return hashA; }
void gadPos(char *db, char *outFileName) /* Try to get genomic positions for GAD gene symbols from knownGene, refGene, * kgAlias and Gencode V14 in that order. */ { FILE *outF = mustOpen(outFileName, "w"); struct sqlConnection *conn = hAllocConn(db); /* loop over all gene symbols in GAD */ struct slName *geneSymbols = sqlQuickList(conn, NOSQLINJ "select distinct geneSymbol from gadAll where association='Y'"); struct slName *symbol; int kcCount = 0, rgCount = 0, kaCount = 0, gcCount = 0, missingCount = 0; for (symbol = geneSymbols; symbol != NULL; symbol = symbol->next) { if (findInKnownCanonical(conn, symbol->name, outF)) kcCount++; else if (findInRefGene(conn, symbol->name, outF)) rgCount++; else if (findInKgAlias(conn, symbol->name, outF)) kaCount++; else if (findInGencode(conn, symbol->name, outF)) gcCount++; else { verbose(2, "No result for gene symbol '%s'\n", symbol->name); missingCount++; } } verbose(1, "Found in knownCanonical: %d\n", kcCount); verbose(1, "Found in refGene: %d\n", rgCount); verbose(1, "Found in kgAlias: %d\n", kaCount); verbose(1, "Found in Gencode: %d\n", gcCount); verbose(1, "Not found: %d\n", missingCount); hFreeConn(&conn); carefulClose(&outF); }
struct slName *spGeneToAccs(struct sqlConnection *conn, char *gene, int taxon) /* Get list of accessions associated with this gene. If * taxon is zero then this will return all accessions, if * taxon is non-zero then it will restrict it to a single * organism with that taxon ID. */ { char query[256]; if (taxon == 0) { safef(query, sizeof(query), "select acc from gene where val = '%s'", gene); } else { safef(query, sizeof(query), "select gene.acc from gene,accToTaxon " "where gene.val = '%s' " "and gene.acc = accToTaxon.acc " "and accToTaxon.taxon = %d" , gene, taxon); } return sqlQuickList(conn, query); }
char *visiGeneHypertextGenotype(struct sqlConnection *conn, int id) /* Return genotype of organism if any in nifty hypertext format. */ { int genotypeId; struct slName *geneIdList, *geneId; char query[256]; struct dyString *html; /* Look up genotype ID. */ safef(query, sizeof(query), "select specimen.genotype from image,specimen " "where image.id=%d and image.specimen = specimen.id", id); genotypeId = sqlQuickNum(conn, query); if (genotypeId == 0) return NULL; /* Get list of genes involved. */ safef(query, sizeof(query), "select distinct allele.gene from genotypeAllele,allele " "where genotypeAllele.genotype=%d " "and genotypeAllele.allele = allele.id" , genotypeId); geneIdList = sqlQuickList(conn, query); if (geneIdList == NULL) return cloneString("wild type"); /* Loop through each gene adding information to html. */ html = dyStringNew(0); for (geneId = geneIdList; geneId != NULL; geneId = geneId->next) { char *geneName; struct slName *alleleList, *allele; int alleleCount; boolean needsSlash = FALSE; /* Get gene name. */ safef(query, sizeof(query), "select name from gene where id=%s", geneId->name); geneName = sqlQuickString(conn, query); if (geneName == NULL) internalErr(); /* Process each allele of gene. */ safef(query, sizeof(query), "select allele.name from genotypeAllele,allele " "where genotypeAllele.genotype=%d " "and genotypeAllele.allele = allele.id " "and allele.gene=%s" , genotypeId, geneId->name); alleleList = sqlQuickList(conn, query); alleleCount = slCount(alleleList); for (allele = alleleList; allele != NULL; allele = allele->next) { char *simplifiedAllele = getSimplifiedAllele(geneName, allele->name); int repCount = 1, rep; if (alleleCount == 1) repCount = 2; for (rep = 0; rep < repCount; ++rep) { if (needsSlash) dyStringAppendC(html, '/'); else needsSlash = TRUE; dyStringAppend(html, geneName); dyStringPrintf(html, "<SUP>%s</SUP>", simplifiedAllele); } freeMem(simplifiedAllele); } if (geneId->next != NULL) dyStringAppendC(html, ' '); slFreeList(&alleleList); freeMem(geneName); } slFreeList(&geneIdList); return dyStringCannibalize(&html); }
char *getKnownGeneUrl(struct sqlConnection *conn, int geneId) /* Given gene ID, try and find known gene on browser in same * species. */ { char query[256]; int taxon; char *url = NULL; char *genomeDb = NULL; /* Figure out taxon. */ safef(query, sizeof(query), "select taxon from gene where id = %d", geneId); taxon = sqlQuickNum(conn, query); genomeDb = hDbForTaxon(conn, taxon); if (genomeDb != NULL) { /* Make sure known genes track exists - we may need * to tweak this at some point for model organisms. */ safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb); if (!sqlTableExists(conn, query)) genomeDb = NULL; } /* If no db for that organism revert to human. */ if (genomeDb == NULL) genomeDb = hDefaultDb(); safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb); if (sqlTableExists(conn, query)) { struct dyString *dy = dyStringNew(0); char *knownGene = NULL; if (sqlCountColumnsInTable(conn, query) == 3) { dyStringPrintf(dy, "select name from %s.knownToVisiGene where geneId = %d", genomeDb, geneId); } else { struct slName *imageList, *image; safef(query, sizeof(query), "select imageProbe.image from probe,imageProbe " "where probe.gene=%d and imageProbe.probe=probe.id", geneId); imageList = sqlQuickList(conn, query); if (imageList != NULL) { dyStringPrintf(dy, "select name from %s.knownToVisiGene ", genomeDb); dyStringAppend(dy, "where value in("); for (image = imageList; image != NULL; image = image->next) { dyStringPrintf(dy, "'%s'", image->name); if (image->next != NULL) dyStringAppendC(dy, ','); } dyStringAppend(dy, ")"); slFreeList(&imageList); } } if (dy->stringSize > 0) { knownGene = sqlQuickString(conn, dy->string); if (knownGene != NULL) { dyStringClear(dy); dyStringPrintf(dy, "../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=none", genomeDb, knownGene); url = dyStringCannibalize(&dy); } } dyStringFree(&dy); } freez(&genomeDb); return url; }
struct slName *jaxSpecList(struct sqlConnection *conn) /* Get list of specimen id's. */ { return sqlQuickList(conn, NOSQLINJ "select _Specimen_key from GXD_Specimen"); }