static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, struct hash *chromSizeHash, struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList) /* Parse out bedGraph section until we get to something that is not in bedGraph format. */ { /* Set up hash and list to store chromosomes. */ struct hash *chromHash = hashNew(0); struct bedGraphChrom *chrom, *chromList = NULL; /* Collect lines in items on appropriate chromosomes. */ struct bwgBedGraphItem *item; char *line; while (lineFileNextReal(lf, &line)) { /* Check for end of section. */ if (stepTypeLine(line)) { lineFileReuse(lf); break; } /* Parse out our line and make sure it has exactly 4 columns. */ char *words[5]; int wordCount = chopLine(line, words); lineFileExpectWords(lf, 4, wordCount); /* Get chromosome. */ char *chromName = words[0]; chrom = hashFindVal(chromHash, chromName); if (chrom == NULL) { lmAllocVar(chromHash->lm, chrom); hashAddSaveName(chromHash, chromName, chrom, &chrom->name); chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM); slAddHead(&chromList, chrom); } /* Convert to item and add to chromosome list. */ lmAllocVar(lm, item); item->start = lineFileNeedNum(lf, words, 1); item->end = lineFileNeedNum(lf, words, 2); item->val = lineFileNeedDouble(lf, words, 3); /* Do sanity checking on coordinates. */ if (item->start > item->end) errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", item->start, item->end, lf->lineIx, lf->fileName); if (item->end > chrom->size) { warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u", lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&chrom->itemList, item); } } slSort(&chromList, bedGraphChromCmpName); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { slSort(&chrom->itemList, bwgBedGraphItemCmp); /* Break up into sections of no more than items-per-slot size. */ struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) { /* Find end item of this section, and start item for next section. * Terminate list at end item. */ int sectionSize = 0; int i; endItem = startItem; for (i=0; i<itemsPerSlot; ++i) { if (nextStartItem == NULL) break; endItem = nextStartItem; nextStartItem = nextStartItem->next; ++sectionSize; } endItem->next = NULL; /* Fill in section and add it to section list. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = cloneString(chrom->name); section->start = startItem->start; section->end = endItem->end; section->type = bwgTypeBedGraph; section->items.bedGraphList = startItem; section->itemCount = sectionSize; slAddHead(pSectionList, section); } } /* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in * hash's memory. */ hashFree(&chromHash); chromList = NULL; }
void doTransRegCodeProbe(struct trackDb *tdb, char *item, char *codeTable, char *motifTable, char *tfToConditionTable, char *conditionTable) /* Display detailed info on a ChIP-chip probe from transRegCode experiments. */ { char query[256]; struct sqlResult *sr; char **row; int rowOffset = hOffsetPastBin(database, seqName, tdb->table); struct sqlConnection *conn = hAllocConn(database); struct transRegCodeProbe *probe = NULL; cartWebStart(cart, database, "ChIP-chip Probe Info"); sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", tdb->table, item); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) probe = transRegCodeProbeLoad(row+rowOffset); sqlFreeResult(&sr); if (probe != NULL) { struct tfData *tfList = NULL, *tf; struct hash *tfHash = newHash(0); struct transRegCode *trc; int i; /* Print basic info. */ printf("<B>Name:</B> %s<BR>\n", probe->name); printPosOnChrom(probe->chrom, probe->chromStart, probe->chromEnd, NULL, TRUE, probe->name); /* Make up list of all transcriptionFactors. */ for (i=0; i<probe->tfCount; ++i) { /* Parse out factor and condition. */ char *tfName = probe->tfList[i]; char *condition = strchr(tfName, '_'); struct tfCond *cond; if (condition != NULL) *condition++ = 0; else condition = "n/a"; tf = hashFindVal(tfHash, tfName); if (tf == NULL) { AllocVar(tf); hashAddSaveName(tfHash, tfName, tf, &tf->name); slAddHead(&tfList, tf); } AllocVar(cond); cond->name = cloneString(condition); cond->binding = probe->bindVals[i]; slAddHead(&tf->conditionList, cond); } slSort(&tfList, tfDataCmpName); /* Fold in motif hits in region. */ if (sqlTableExists(conn, codeTable)) { sr = hRangeQuery(conn, codeTable, probe->chrom, probe->chromStart, probe->chromEnd, "chipEvidence != 'none'", &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { trc = transRegCodeLoad(row+rowOffset); tf = hashFindVal(tfHash, trc->name); if (tf != NULL) slAddTail(&tf->trcList, trc); } sqlFreeResult(&sr); } if (tfList == NULL) printf("No significant immunoprecipitation."); else { tfBindLevelSection(tfList, conn, motifTable, tfToConditionTable); } transRegCodeProbeFree(&probe); growthConditionSection(conn, conditionTable); } printf("\n<HR>\n"); printTrackHtml(tdb); hFreeConn(&conn); }
void *startHandler(struct xap *xap, char *name, char **atts) /* Called at the start of a tag after attributes are parsed. */ { int i; struct type *type = hashFindVal(typeHash, name); struct attribute *att; struct element *el; if (type == NULL) { AllocVar(type); hashAddSaveName(typeHash, name, type, &type->name); type->elHash = hashNew(6); type->attHash = hashNew(6); } /* Zero out seenThisRound flags */ for (el = type->elements; el != NULL; el = el->next) el->seenThisRound = FALSE; for (att = type->attributes; att != NULL; att = att->next) att->seenThisRound = FALSE; for (i=0; atts[i] != NULL; i += 2) { char *name = atts[i], *val = atts[i+1]; int valLen = strlen(val); att = hashFindVal(type->attHash, name); if (att == NULL) { AllocVar(att); hashAddSaveName(type->attHash, name, att, &att->name); att->values = hashNew(16); slAddTail(&type->attributes, att); if (type->count != 0) att->isOptional = TRUE; } att->count += 1; hashStore(att->values, val); if (valLen > att->maxLen) att->maxLen = valLen; if (!att->nonInt) if (!isAllInt(val) || hasLeftPaddedZero(val)) att->nonInt = TRUE; if (!att->nonFloat) if (!isAllFloat(val)) att->nonFloat = TRUE; att->seenThisRound = TRUE; } for (att = type->attributes; att != NULL; att = att->next) { if (!att->seenThisRound) att->isOptional = TRUE; } if (xap->stackDepth > 1) { struct xapStack *st = xap->stack+1; struct type *parent = st->object; el = hashFindVal(parent->elHash, name); if (el == NULL) { AllocVar(el); hashAdd(parent->elHash, name, el); el->type = type; slAddTail(&parent->elements, el); if (parent->count != 0) el->isOptional = TRUE; } if (el->seenThisRound) el->isList = TRUE; el->seenThisRound = TRUE; } return type; }
void fixHarbisonMotifs(char *database) /* fixHarbisonMotifs - Trim motifs that have beginning or ending columns that * are degenerate.. */ { char *motifTable = "transRegCodeMotif"; char *siteTable = "transRegCode"; struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[512], **row; struct motifSize *msList = NULL, *ms; struct hash *msHash = newHash(16); boolean anyMinNotMax = FALSE; boolean anyMissingMotif = FALSE; boolean anyMotifNotFound = FALSE; struct dnaMotif *motif; /* Stream through site table collecting data about motif sizes. */ sqlSafef(query, sizeof(query), "select name,chromEnd-chromStart from %s", siteTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; int size = atoi(row[1]); ms = hashFindVal(msHash, name); if (ms == NULL) { AllocVar(ms); hashAddSaveName(msHash, name, ms, &ms->name); ms->minSize = ms->maxSize = size; slAddHead(&msList, ms); } else { if (size < ms->minSize) ms->minSize = size; if (size > ms->maxSize) ms->maxSize = size; } } sqlFreeResult(&sr); /* Go through and report if minSize != maxSize. */ for (ms = msList; ms != NULL; ms = ms->next) { if (ms->minSize != ms->maxSize) { anyMinNotMax = TRUE; warn("%s size inconsistent: min %d, max %d", ms->name, ms->minSize, ms->maxSize); } } if (!anyMinNotMax) warn("All sizes agree in %s", siteTable); /* Stream through motifs and add to msList. */ sqlSafef(query, sizeof(query), "select * from %s", motifTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { motif = dnaMotifLoad(row); ms = hashFindVal(msHash, motif->name); if (ms == NULL) { anyMissingMotif = TRUE; warn("Motif %s is in %s but not %s", motif->name, motifTable, siteTable); } else { ms->motif = motif; } } sqlFreeResult(&sr); if (!anyMissingMotif) warn("All motifs in %s are also in %s", motifTable, siteTable); /* Make sure that all items in msList have a motif. */ for (ms = msList; ms != NULL; ms = ms->next) { if (ms->motif == NULL) { anyMotifNotFound = TRUE; warn("Motif %s is in %s but not %s", ms->name, siteTable, motifTable); } } if (!anyMotifNotFound) warn("All motifs in %s are also in %s", siteTable, motifTable); /* Loop through table and deal with motifs that have different number * of columns in motif and site tables. */ for (ms = msList; ms != NULL; ms = ms->next) { motif = ms->motif; if (motif != NULL && ms->minSize == ms->maxSize) { if (motif->columnCount != ms->minSize) { warn("Motif %s has %d columns in %s but %d columns in %s", ms->name, ms->minSize, siteTable, motif->columnCount, motifTable); fixMotif(motif, ms->minSize, motifTable, conn); } } } sqlDisconnect(&conn); }
void ctgFaToFa(char *ctgFa, char *ctgCoords, char *ntDir) /* ctgFaToFa - Convert from one big file with all NT contigs to one contig per file.. */ { struct lineFile *lf; char fileName[512], *line; char *ntName, *hsName; char *parts[6]; int lineSize, partCount; struct hash *uniqHash = newHash(0); FILE *f = NULL; int dotMod = 0; struct hash *ntHash = newHash(0); struct hash *hsHash = newHash(0); struct ntContig *nt; char *words[8]; printf("Loading %s\n", ctgCoords); lf = lineFileOpen(ctgCoords, TRUE); while (lineFileRow(lf, words)) { ntName = words[0]; if ((nt = hashFindVal(ntHash, ntName)) != NULL) ++nt->cloneCount; else { AllocVar(nt); hashAddSaveName(ntHash, ntName, nt, &nt->name); hashAddSaveName(hsHash, words[1], nt, &nt->hsName); nt->cloneCount = 1; } } lineFileClose(&lf); lf = lineFileOpen(ctgFa, FALSE); makeDir(ntDir); while (lineFileNext(lf, &line, &lineSize)) { if ((++dotMod&0x1ffff) == 0) { printf("."); fflush(stdout); } if (line[0] == '>') { carefulClose(&f); line[lineSize-1] = 0; partCount = chopByChar(line, '|',parts,ArraySize(parts)); if (partCount < 3) { uglyf("partCount = %d\n", partCount); errAbort("Expecting | separated header line %d of %s", lf->lineIx, lf->fileName); } ntName = parts[1]; nt = hashFindVal(ntHash, ntName); hsName = parts[2]; if (nt == NULL) { hsName = firstWordInLine(ntName); nt = hashMustFindVal(hsHash, hsName); ntName = nt->name; } if (nt->cloneCount > 1) { if (!startsWith("Hs", hsName)) errAbort("Expecting %s to start with 'Hs' line %d of %s", hsName, lf->lineIx, lf->fileName); if (hashLookup(uniqHash, ntName)) ntName = nextFakeNtName(hsName, ntName); hashAddUnique(uniqHash, ntName, NULL); if (!startsWith("NT_", ntName)) errAbort("Expecting NT_ name line %d of %s", lf->lineIx, lf->fileName); sprintf(fileName, "%s/%s.fa", ntDir, ntName); f = mustOpen(fileName, "w"); fprintf(f, ">%s.1_1\n", ntName); } } else { if (f != NULL) mustWrite(f, line, lineSize); } } printf("\n"); carefulClose(&f); lineFileClose(&lf); }
void readFinfFiles(char *gsDir) /* Read in .finf files and save info in cloneHash/cloneList. */ { struct lineFile *lf; struct clone *clone = NULL; struct endInfo *end; char fileName[512]; int i; char *words[7]; char lastClone[64]; char cloneName[64]; int gsInfoCount = 0; struct frag *frag; boolean isFin; char *s, *e; strcpy(lastClone, ""); for (i=0; i<ArraySize(gsFiles); ++i) { isFin = (i <= 0); sprintf(fileName, "%s/%s", gsDir, gsFiles[i]); printf("Reading info from %s\n", fileName); lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, words)) { if (!sameString(words[1], lastClone)) { struct clone *oldClone; strcpy(lastClone, words[1]); strcpy(cloneName, words[1]); AllocVar(clone); s = strchr(cloneName, '.'); if (s == NULL) errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName); if (strlen(s) >= sizeof(clone->version)) errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName); strcpy(clone->version, s); chopSuffix(cloneName); clone->size = atoi(words[3]); if ((oldClone = hashFindVal(cloneHash, cloneName)) != NULL) { if (isFin && clone->size == oldClone->size && sameString(clone->version, oldClone->version)) warn("Apparently benign duplication of %s line %d of %s", cloneName, lf->lineIx, lf->fileName); else warn("%s duplicated line %d of %s (size %d oldSize %d)", cloneName, lf->lineIx, lf->fileName, clone->size, oldClone->size); } hashAddSaveName(cloneHash, cloneName, clone, &clone->name); clone->isFin = isFin; slAddHead(&cloneList, clone); } frag = newFrag(words[0], lf); slAddTail(&clone->fragList, frag); ++clone->fragCount; if (!clone->isFin && !sameString(words[6], "?") && !sameString(words[6], "i") && !sameString(words[6], "w")) { char *s = strchr(words[0], '~'); char c; if (s == NULL) errAbort("Expecting ~ in fragment name line %d of %s\n", lf->lineIx, lf->fileName); ++s; AllocVar(end); end->contig = cloneString(s); subChar(s, '.', '_'); end->text = cloneString(words[6]); c = lastChar(end->text); if (!(c == 'L' || c == 'R')) c = '?'; end->lr = c; slAddHead(&clone->gsList, end); ++gsInfoCount; } } lineFileClose(&lf); } printf("Found %d ends in %d clones\n", gsInfoCount, slCount(cloneList)); }