struct slName *segSecSpeciesList(struct segBlock *sb, struct segComp *refComp, char sepChar) /* Return a name list containing the species possibly followed by sepChar of all components other than refComp on the block. */ { struct segComp *sc; char *p; struct slName *speciesList = NULL; for (sc = sb->components; sc != NULL; sc = sc->next) { if (sc == refComp) continue; if ((p = strchr(sc->src, '.')) != NULL) *p = '\0'; slNameStore(&speciesList, sc->src); if (p != NULL) *p = '.'; } return(speciesList); }
void rqlParseVarsUsed(struct rqlParse *p, struct slName **pVarList) /* Put variables used by self and children onto varList. */ { if (p->op == rqlOpSymbol) slNameStore(pVarList, p->val.s); struct rqlParse *child; for (child = p->children; child != NULL; child = child->next) rqlParseVarsUsed(child, pVarList); }
struct slName *jsAllDb(struct joinerSet *js) /* Get list of all databases referred to by set. */ { struct slName *list = NULL, *db; struct joinerField *jf; for (jf = js->fieldList; jf != NULL; jf = jf->next) { for (db = jf->dbList; db != NULL; db = db->next) slNameStore(&list, db->name); } return list; }
struct slName *tdbListGetGroups(struct trackDb *tdbList) // Returns a list of groups found in the tdbList // FIXME: Should be moved to trackDbCustom and shared { struct slName *groupList = NULL; char *lastGroup = "[]"; struct trackDb *tdb = tdbList; for (;tdb!=NULL;tdb=tdb->next) { if (differentString(lastGroup,tdb->grp)) lastGroup = slNameStore(&groupList, tdb->grp); } return groupList; }
struct slName *getReplicateNames(struct fullExperiment *expList) /* Return list of all replicate names */ { struct slName *repList = NULL; struct fullExperiment *exp; for (exp = expList; exp != NULL; exp = exp->next) { struct replicate *rep; for (rep = exp->repList; rep != NULL; rep = rep->next) { char *name = rep->name; if (isEmpty(name)) name = "pooled"; slNameStore(&repList, name); } } slSort(&repList, slNameCmpStringsWithEmbeddedNumbers); return repList; }
struct slName *slNameListOfUniqueWords(char *text,boolean respectQuotes) /* Return list of unique words found by parsing string delimited by whitespace. * If respectQuotes then ["Lucy and Ricky" 'Fred and Ethyl'] will yield 2 slNames no quotes */ { struct slName *list = NULL; char *word = NULL; while (text != NULL) { if (respectQuotes) word = nextQuotedWord(&text); else word = nextWord(&text); if (word) slNameStore(&list, word); else break; } slReverse(&list); return list; }
static struct joinerSet *parseIdentifierSet(struct lineFile *lf, char *line, struct hash *symHash, struct dyString *dyBuf) /* Parse out one joiner record - keep going until blank line or * end of file. */ { struct joinerSet *js; struct joinerField *jf; char *word, *e; char *parts[3]; int partCount; /* Parse through first line - first word is name. */ word = nextWord(&line); if (word == NULL || strchr(word, '=') != NULL) errAbort("joiner without name line %d of %s\n", lf->lineIx, lf->fileName); AllocVar(js); js->name = cloneString(word); js->lineIx = lf->lineIx; while ((word = nextWord(&line)) != NULL) { char *e = strchr(word, '='); if (e != NULL) *e++ = 0; if (sameString(word, "typeOf")) { js->typeOf = cloneSpecified(lf, word, e); } else if (sameString(word, "external")) { js->external = cloneSpecified(lf, word, e); } else if (sameString(word, "fuzzy")) { js->isFuzzy = TRUE; } else if (sameString(word, "dependency")) { js->isDependency = TRUE; } else { errAbort("Unknown attribute %s line %d of %s", word, lf->lineIx, lf->fileName); } } /* Parse second line, make sure it is quoted, and save as description. */ line = nextSubbedLine(lf, symHash, dyBuf); if (line == NULL) lineFileUnexpectedEnd(lf); line = trimSpaces(line); if (line[0] != '"' || lastChar(line) != '"') errAbort("Expecting quoted line, line %d of %s\n", lf->lineIx, lf->fileName); line[strlen(line)-1] = 0; js->description = cloneString(line+1); /* Go through subsequent lines. */ while ((line = nextSubbedLine(lf, symHash, dyBuf)) != NULL) { /* Keep grabbing until we get a blank line. */ line = skipLeadingSpaces(line); if (line[0] == 0) break; /* First word in line should be database.tabe.field. */ word = nextWord(&line); partCount = chopString(word, ".", parts, ArraySize(parts)); if (partCount != 3) errAbort("Expecting database.table.field line %d of %s", lf->lineIx, lf->fileName); /* Allocate struct and save table and field. */ AllocVar(jf); jf->lineIx = lf->lineIx; jf->table = cloneString(parts[1]); jf->field = cloneString(parts[2]); if (js->fieldList == NULL && !js->isFuzzy) { jf->isPrimary = TRUE; jf->unique = TRUE; jf->full = TRUE; } jf->minCheck = 1.0; slAddHead(&js->fieldList, jf); /* Database may be a comma-separated list. Parse it here. */ jf->dbList = parseDatabaseList(lf, parts[0]); /* Look for other fields in subsequent space-separated words. */ while ((word = nextWord(&line)) != NULL) { if ((e = strchr(word, '=')) != NULL) *e++ = 0; if (sameString("comma", word)) { jf->separator = cloneString(","); } else if (sameString("separator", word)) { jf->separator = cloneSpecified(lf, word, e); } else if (sameString("chopBefore", word)) { if (e == NULL) unspecifiedVar(lf, word); slNameStore(&jf->chopBefore, e); } else if (sameString("chopAfter", word)) { if (e == NULL) unspecifiedVar(lf, word); slNameStore(&jf->chopAfter, e); } else if (sameString("indexOf", word)) { jf->indexOf = TRUE; } else if (sameString("dupeOk", word)) { if (!jf->isPrimary) warn("dupeOk outsite primary key line %d of %s", lf->lineIx, lf->fileName); jf->unique = FALSE; } else if (sameString("minCheck", word)) { if (e == NULL) unspecifiedVar(lf, word); jf->minCheck = atof(e); } else if (sameString("unique", word)) { jf->unique = TRUE; } else if (sameString("full", word)) { jf->full = TRUE; } else if (sameString("splitPrefix", word)) { jf->splitPrefix = cloneSpecified(lf, word, e); } else if (sameString("splitSuffix", word)) { jf->splitSuffix = cloneSpecified(lf, word, e); } else if (sameString("exclude", word)) { if (e == NULL) unspecifiedVar(lf, word); slNameStore(&jf->exclude, e); } else { errAbort("Unrecognized attribute %s line %d of %s", word, lf->lineIx, lf->fileName); } } if (jf->indexOf && jf->separator == NULL) errAbort("indexOf without comma or separator line %d of %s", lf->lineIx, lf->fileName); if (jf->isPrimary && jf->separator != NULL) errAbort("Error line %d of %s\n" "Primary key can't be a list (comma or separator)." , lf->lineIx, lf->fileName); } slReverse(&js->fieldList); return js; }
void bedListExpRecordAverage(struct bed **pBedList, struct expRecord **pERList, int extrasIndex) /* This is a mildy complicated function to make the details page have the */ /* same data as the track when the UI option "Tissue averages" is selected. */ /* This is done by hacking the bed and expRecord lists in place and keeping */ /* the original code for the most part. */ { struct bed *bed = NULL; struct expRecord *er, *newERList = NULL; struct slName *extras = NULL, *oneSlName; int M, N, i, columns; int *mapping; if (!pBedList || !pERList || !*pBedList || !*pERList) return; er = *pERList; if ((extrasIndex < 0) || (extrasIndex >= er->numExtras)) return; /* Build up a unique list of words from the specific "extras" column. */ for (er = *pERList; er != NULL; er = er->next) slNameStore(&extras, er->extras[extrasIndex]); slReverse(&extras); M = slCount(extras); N = slCount(*pERList); columns = N + 1; /* M rows, reserve first column for counts. */ mapping = needMem(sizeof(int) * M * columns); /* Create the mapping array: */ /* each row corresponds to one of the groupings. The first column is the number of */ /* things in the original list in the group (k things), and the next k elements on */ /* that row are indeces. */ for (er = *pERList, i = 0; er != NULL && i < N; er = er->next, i++) { int ix = slNameFindIx(extras, er->extras[extrasIndex]) * columns; mapping[ix + ++mapping[ix]] = er->id; } /* Make a new expRecord list. */ for (oneSlName = extras, i = 0; oneSlName != NULL && i < M; oneSlName = oneSlName->next, i++) { struct expRecord *newER = basicExpRecord(oneSlName->name, i, extrasIndex); slAddHead(&newERList, newER); } slReverse(&newERList); expRecordFreeList(pERList); *pERList = newERList; /* Go through each bed and change it. */ for (bed = *pBedList; bed != NULL; bed = bed->next) { float *newExpScores = needMem(sizeof(float) * M); int *newExpIds = needMem(sizeof(int) * M); /* Calculate averages. */ for (i = 0; i < M; i++) { int ix = i * columns; int size = mapping[ix]; int j; double sum = 0; for (j = 1; j < size + 1; j++) sum += (double)bed->expScores[mapping[ix + j]]; newExpScores[i] = (float)(sum/size); newExpIds[i] = i; } bed->expCount = M; freeMem(bed->expIds); bed->expIds = newExpIds; freeMem(bed->expScores); bed->expScores = newExpScores; } /* Free stuff. */ slNameFreeList(&extras); freez(&mapping); }
void hgFlyBase(char *database, char *genesFile) /* hgFlyBase - Parse FlyBase genes.txt file and turn it into a couple of * tables. */ { char *tGene = "fbGene"; char *tSynonym = "fbSynonym"; char *tAllele = "fbAllele"; char *tRef = "fbRef"; char *tRole = "fbRole"; char *tPhenotype = "fbPhenotype"; char *tTranscript = "fbTranscript"; char *tGo = "fbGo"; char *tUniProt = "fbUniProt"; FILE *fGene = hgCreateTabFile(tabDir, tGene); FILE *fSynonym = hgCreateTabFile(tabDir, tSynonym); FILE *fAllele = hgCreateTabFile(tabDir, tAllele); FILE *fRef = hgCreateTabFile(tabDir, tRef); FILE *fRole = hgCreateTabFile(tabDir, tRole); FILE *fPhenotype = hgCreateTabFile(tabDir, tPhenotype); FILE *fTranscript = NULL; FILE *fGo = hgCreateTabFile(tabDir, tGo); FILE *fUniProt = hgCreateTabFile(tabDir, tUniProt); struct lineFile *lf = lineFileOpen(genesFile, TRUE); struct hash *refHash = newHash(19); int nextRefId = 0; int nextAlleleId = 0; char *line, sub, type, *rest, *s; char *geneSym = NULL, *geneName = NULL, *geneId = NULL; int recordCount = 0; struct slName *synList = NULL, *syn; int curAllele = 0, curRef = 0; struct ref *ref = NULL; struct sqlConnection *conn; struct hash *goUniqHash = newHash(18); /* Make table from flybase genes to BGDP transcripts. */ if (doTranscript) { fTranscript = hgCreateTabFile(tabDir, tTranscript); getAllSplices(database, fTranscript); } /* Make dummy reference for flybase itself. */ fprintf(fRef, "0\tFlyBase\n"); /* Loop through parsing and writing tab files. */ while (lineFileNext(lf, &line, NULL)) { sub = line[0]; if (sub == '#') { /* End of record. */ ++recordCount; if (geneId == NULL) errAbort("Record without *z line ending line %d of %s", lf->lineIx, lf->fileName); /* Write out synonyms. */ s = naForNull(geneSym); geneSym = ungreek(s); freeMem(s); s = naForNull(geneName); geneName = ungreek(s); if (! sameString(s, "n/a")) freeMem(s); if (geneSym != NULL && !sameString(geneSym, "n/a")) slNameStore(&synList, geneSym); if (geneName != NULL && !sameString(geneName, "n/a")) slNameStore(&synList, geneName); for (syn = synList; syn != NULL; syn = syn->next) { s = ungreek(syn->name); fprintf(fSynonym, "%s\t%s\n", geneId, s); freeMem(s); } /* Write out gene record. */ fprintf(fGene, "%s\t%s\t%s\n", geneId, geneSym, geneName); /* Clean up. */ freez(&geneSym); freez(&geneName); freez(&geneId); slFreeList(&synList); ref = NULL; curRef = curAllele = 0; continue; } else if (sub == 0) errAbort("blank line %d of %s, not allowed in gene.txt", lf->lineIx, lf->fileName); else if (isalnum(sub)) errAbort("line %d of %s begins with %c, not allowed", lf->lineIx, lf->fileName, sub); type = line[1]; rest = trimSpaces(line+2); if (sub == '*' && type == 'a') geneSym = cloneString(rest); else if (sub == '*' && type == 'e') geneName = cloneString(rest); else if (sub == '*' && type == 'z') { geneId = cloneString(rest); if (!startsWith("FBgn", geneId)) errAbort("Bad FlyBase gene ID %s line %d of %s", geneId, lf->lineIx, lf->fileName); } else if (type == 'i' && (sub == '*' || sub == '$')) { if (strlen(rest) > 2) /* Avoid short useless ones. */ slNameStore(&synList, rest); } else if (sub == '*' && type == 'A') { if (geneId == NULL) errAbort("Allele before geneId line %d of %s", lf->lineIx, lf->fileName); curAllele = ++nextAlleleId; fprintf(fAllele, "%d\t%s\t%s\n", curAllele, geneId, rest); if (!sameString(rest, "classical") && !sameString(rest, "in vitro") && !sameString(rest, "wild-type") ) { slNameStore(&synList, rest); } } else if (sub == '*' && type == 'm') { if (geneId == NULL) errAbort("*m protein ID before geneId line %d of %s", lf->lineIx, lf->fileName); if (startsWith("UniProt", rest)) { char *ptr = strchr(rest, ':'); if (ptr != NULL) ptr++; else errAbort("Trouble parsing UniProt ID %s like %d of %s", rest, lf->lineIx, lf->fileName); fprintf(fUniProt, "%s\t%s\n", geneId, ptr); } } else if (type == 'E') { ref = hashFindVal(refHash, rest); if (ref == NULL) { AllocVar(ref); ref->id = ++nextRefId; hashAdd(refHash, rest, ref); subChar(rest, '\t', ' '); fprintf(fRef, "%d\t%s\n", ref->id, rest); } curRef = ref->id; } else if ((type == 'k' || type == 'r' || type == 'p') && sub != '@') { FILE *f = (type == 'r' ? fRole : fPhenotype); struct dyString *dy = suckSameLines(lf, line); subChar(dy->string, '\t', ' '); if (geneId == NULL) errAbort("Expecting *z in record before line %d of %s", lf->lineIx, lf->fileName); fprintf(f, "%s\t%d\t%d\t%s\n", geneId, curAllele, curRef, dy->string); dyStringFree(&dy); } else if (type == 'd' || type == 'f' || type == 'F') { FILE *f = fGo; char aspect = (type == 'd') ? 'P' : (type == 'f') ? 'C' : 'F'; char *goId = rest; char *p = strstr(goId, " ; "); char assoc[128]; if (p == NULL) continue; else goId = firstWordInLine(p + 3); safef(assoc, sizeof(assoc), "%s.%s", geneId, goId); if (hashLookup(goUniqHash, assoc) == NULL) { hashAddInt(goUniqHash, assoc, 1); fprintf(f, "%s\t%s\t%c\n", geneId, goId, aspect); } } } printf("Processed %d records in %d lines\n", recordCount, lf->lineIx); lineFileClose(&lf); conn = sqlConnect(database); remakeTables(conn); if (doLoad) { printf("Loading %s\n", tGene); hgLoadTabFile(conn, tabDir, tGene, &fGene); if (doTranscript) { printf("Loading %s\n", tTranscript); hgLoadTabFile(conn, tabDir, tTranscript, &fTranscript); } printf("Loading %s\n", tSynonym); hgLoadTabFile(conn, tabDir, tSynonym, &fSynonym); printf("Loading %s\n", tAllele); hgLoadTabFile(conn, tabDir, tAllele, &fAllele); printf("Loading %s\n", tRef); hgLoadTabFile(conn, tabDir, tRef, &fRef); printf("Loading %s\n", tRole); hgLoadTabFile(conn, tabDir, tRole, &fRole); printf("Loading %s\n", tPhenotype); hgLoadTabFile(conn, tabDir, tPhenotype, &fPhenotype); printf("Loading %s\n", tGo); hgLoadTabFile(conn, tabDir, tGo, &fGo); printf("Loading %s\n", tUniProt); hgLoadTabFile(conn, tabDir, tUniProt, &fUniProt); hgRemoveTabFile(tabDir, tGene); if (doTranscript) hgRemoveTabFile(tabDir, tTranscript); hgRemoveTabFile(tabDir, tSynonym); hgRemoveTabFile(tabDir, tAllele); hgRemoveTabFile(tabDir, tRef); hgRemoveTabFile(tabDir, tRole); hgRemoveTabFile(tabDir, tPhenotype); hgRemoveTabFile(tabDir, tGo); hgRemoveTabFile(tabDir, tUniProt); } }