コード例 #1
0
static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, 
	struct hash *chromSizeHash, struct lm *lm, 
	int itemsPerSlot, struct bwgSection **pSectionList)
/* Parse out bedGraph section until we get to something that is not in bedGraph format. */
{
/* Set up hash and list to store chromosomes. */
struct hash *chromHash = hashNew(0);
struct bedGraphChrom *chrom, *chromList = NULL;

/* Collect lines in items on appropriate chromosomes. */
struct bwgBedGraphItem *item;
char *line;
while (lineFileNextReal(lf, &line))
    {
    /* Check for end of section. */
    if (stepTypeLine(line))
        {
	lineFileReuse(lf);
	break;
	}

    /* Parse out our line and make sure it has exactly 4 columns. */
    char *words[5];
    int wordCount = chopLine(line, words);
    lineFileExpectWords(lf, 4, wordCount);

    /* Get chromosome. */
    char *chromName = words[0];
    chrom = hashFindVal(chromHash, chromName);
    if (chrom == NULL)
        {
	lmAllocVar(chromHash->lm, chrom);
	hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
	chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM);
	slAddHead(&chromList, chrom);
	}

    /* Convert to item and add to chromosome list. */
    lmAllocVar(lm, item);
    item->start = lineFileNeedNum(lf, words, 1);
    item->end = lineFileNeedNum(lf, words, 2);
    item->val = lineFileNeedDouble(lf, words, 3);

    /* Do sanity checking on coordinates. */
    if (item->start > item->end)
        errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", 
		item->start, item->end, lf->lineIx, lf->fileName);
    if (item->end > chrom->size)
	{
        warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u",
	        lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
	{
	slAddHead(&chrom->itemList, item);
	}
    }
slSort(&chromList, bedGraphChromCmpName);

for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    slSort(&chrom->itemList, bwgBedGraphItemCmp);

    /* Break up into sections of no more than items-per-slot size. */
    struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList;
    for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem)
	{
	/* Find end item of this section, and start item for next section.
	 * Terminate list at end item. */
	int sectionSize = 0;
	int i;
	endItem = startItem;
	for (i=0; i<itemsPerSlot; ++i)
	    {
	    if (nextStartItem == NULL)
		break;
	    endItem = nextStartItem;
	    nextStartItem = nextStartItem->next;
	    ++sectionSize;
	    }
	endItem->next = NULL;

	/* Fill in section and add it to section list. */
	struct bwgSection *section;
	lmAllocVar(lm, section);
	section->chrom = cloneString(chrom->name);
	section->start = startItem->start;
	section->end = endItem->end;
	section->type = bwgTypeBedGraph;
	section->items.bedGraphList = startItem;
	section->itemCount = sectionSize;
	slAddHead(pSectionList, section);
	}
    }

/* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in 
 * hash's memory. */
hashFree(&chromHash);
chromList = NULL;
}
コード例 #2
0
ファイル: regMotif.c プロジェクト: bowhan/kent
void doTransRegCodeProbe(struct trackDb *tdb, char *item,
	char *codeTable, char *motifTable,
	char *tfToConditionTable, char *conditionTable)
/* Display detailed info on a ChIP-chip probe from transRegCode experiments. */
{
char query[256];
struct sqlResult *sr;
char **row;
int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
struct sqlConnection *conn = hAllocConn(database);
struct transRegCodeProbe *probe = NULL;

cartWebStart(cart, database, "ChIP-chip Probe Info");
sqlSafef(query, sizeof(query), "select * from %s where name = '%s'",
	tdb->table, item);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    probe = transRegCodeProbeLoad(row+rowOffset);
sqlFreeResult(&sr);
if (probe != NULL)
    {
    struct tfData *tfList = NULL, *tf;
    struct hash *tfHash = newHash(0);
    struct transRegCode *trc;
    int i;

    /* Print basic info. */
    printf("<B>Name:</B> %s<BR>\n", probe->name);
    printPosOnChrom(probe->chrom, probe->chromStart, probe->chromEnd,
    	NULL, TRUE, probe->name);

    /* Make up list of all transcriptionFactors. */
    for (i=0; i<probe->tfCount; ++i)
        {
	/* Parse out factor and condition. */
	char *tfName = probe->tfList[i];
	char *condition = strchr(tfName, '_');
	struct tfCond *cond;
	if (condition != NULL)
	    *condition++ = 0;
	else
	    condition = "n/a";
	tf = hashFindVal(tfHash, tfName);
	if (tf == NULL)
	    {
	    AllocVar(tf);
	    hashAddSaveName(tfHash, tfName, tf, &tf->name);
	    slAddHead(&tfList, tf);
	    }
	AllocVar(cond);
	cond->name = cloneString(condition);
	cond->binding = probe->bindVals[i];
	slAddHead(&tf->conditionList, cond);
	}
    slSort(&tfList, tfDataCmpName);

    /* Fold in motif hits in region. */
    if (sqlTableExists(conn, codeTable))
        {
	sr = hRangeQuery(conn, codeTable,
		probe->chrom, probe->chromStart, probe->chromEnd,
		"chipEvidence != 'none'", &rowOffset);
	while ((row = sqlNextRow(sr)) != NULL)
	    {
	    trc = transRegCodeLoad(row+rowOffset);
	    tf = hashFindVal(tfHash, trc->name);
	    if (tf != NULL)
		slAddTail(&tf->trcList, trc);
	    }
	sqlFreeResult(&sr);
	}
    if (tfList == NULL)
	printf("No significant immunoprecipitation.");
    else
	{
	tfBindLevelSection(tfList, conn, motifTable, tfToConditionTable);
	}
    transRegCodeProbeFree(&probe);
    growthConditionSection(conn, conditionTable);
    }
printf("\n<HR>\n");
printTrackHtml(tdb);
hFreeConn(&conn);
}
コード例 #3
0
void *startHandler(struct xap *xap, char *name, char **atts)
/* Called at the start of a tag after attributes are parsed. */
{
int i;
struct type *type = hashFindVal(typeHash, name);
struct attribute *att;
struct element *el;

if (type == NULL)
    {
    AllocVar(type);
    hashAddSaveName(typeHash, name, type, &type->name);
    type->elHash = hashNew(6);
    type->attHash = hashNew(6);
    }

/* Zero out seenThisRound flags */
for (el = type->elements; el != NULL; el = el->next)
    el->seenThisRound = FALSE;
for (att = type->attributes; att != NULL; att = att->next)
    att->seenThisRound = FALSE;

for (i=0; atts[i] != NULL; i += 2)
    {
    char *name = atts[i], *val = atts[i+1];
    int valLen = strlen(val);
    att = hashFindVal(type->attHash, name);
    if (att == NULL)
        {
	AllocVar(att);
	hashAddSaveName(type->attHash, name, att, &att->name);
	att->values = hashNew(16);
	slAddTail(&type->attributes, att);
	if (type->count != 0)
	    att->isOptional = TRUE;
	}
    att->count += 1;
    hashStore(att->values, val);
    if (valLen > att->maxLen)
        att->maxLen = valLen;
    if (!att->nonInt)
	if (!isAllInt(val) || hasLeftPaddedZero(val))
	    att->nonInt = TRUE;
    if (!att->nonFloat)
	if (!isAllFloat(val))
	    att->nonFloat = TRUE;
    att->seenThisRound = TRUE;
    }
for (att = type->attributes; att != NULL; att = att->next)
    {
    if (!att->seenThisRound)
        att->isOptional = TRUE;
    }

if (xap->stackDepth > 1)
    {
    struct xapStack *st = xap->stack+1;
    struct type *parent = st->object;
    el = hashFindVal(parent->elHash, name);
    if (el == NULL)
        {
	AllocVar(el);
	hashAdd(parent->elHash, name, el);
	el->type = type;
	slAddTail(&parent->elements, el);
	if (parent->count != 0)
	    el->isOptional = TRUE;
	}
    if (el->seenThisRound)
        el->isList = TRUE;
    el->seenThisRound = TRUE;
    }
return type;
}
コード例 #4
0
ファイル: fixHarbisonMotifs.c プロジェクト: elmargb/kentUtils
void fixHarbisonMotifs(char *database)
/* fixHarbisonMotifs - Trim motifs that have beginning or ending columns that 
 * are degenerate.. */
{
char *motifTable = "transRegCodeMotif";
char *siteTable = "transRegCode";
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[512], **row;
struct motifSize *msList = NULL, *ms;
struct hash *msHash = newHash(16);
boolean anyMinNotMax = FALSE;
boolean anyMissingMotif = FALSE;
boolean anyMotifNotFound = FALSE;
struct dnaMotif *motif;

/* Stream through site table collecting data about motif sizes. */
sqlSafef(query, sizeof(query), 
	"select name,chromEnd-chromStart from %s", siteTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    int size = atoi(row[1]);
    ms = hashFindVal(msHash, name);
    if (ms == NULL)
        {
	AllocVar(ms);
	hashAddSaveName(msHash, name, ms, &ms->name);
	ms->minSize = ms->maxSize = size;
	slAddHead(&msList, ms);
	}
    else
        {
	if (size < ms->minSize)
	    ms->minSize = size;
	if (size > ms->maxSize)
	    ms->maxSize = size;
	}
    }
sqlFreeResult(&sr);

/* Go through and report if minSize != maxSize. */
for (ms = msList; ms != NULL; ms = ms->next)
    {
    if (ms->minSize != ms->maxSize)
        {
	anyMinNotMax = TRUE;
	warn("%s size inconsistent:  min %d, max %d", 
		ms->name, ms->minSize, ms->maxSize);
	}
    }
if (!anyMinNotMax)
    warn("All sizes agree in %s", siteTable);

/* Stream through motifs and add to msList. */
sqlSafef(query, sizeof(query), "select * from %s", motifTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    motif = dnaMotifLoad(row);
    ms = hashFindVal(msHash, motif->name);
    if (ms == NULL)
        {
	anyMissingMotif = TRUE;
	warn("Motif %s is in %s but not %s", 
		motif->name, motifTable, siteTable);
	}
    else
        {
	ms->motif = motif;
	}
    }
sqlFreeResult(&sr);
if (!anyMissingMotif)
    warn("All motifs in %s are also in %s", motifTable, siteTable);

/* Make sure that all items in msList have a motif. */
for (ms = msList; ms != NULL; ms = ms->next)
    {
    if (ms->motif == NULL)
        {
	anyMotifNotFound = TRUE;
	warn("Motif %s is in %s but not %s",
		ms->name, siteTable, motifTable);
	}
    }
if (!anyMotifNotFound)
    warn("All motifs in %s are also in %s", siteTable, motifTable);
    
/* Loop through table and deal with motifs that have different number
 * of columns in motif and site tables. */
for (ms = msList; ms != NULL; ms = ms->next)
    {
    motif = ms->motif;
    if (motif != NULL && ms->minSize == ms->maxSize)
        {
	if (motif->columnCount != ms->minSize)
	    {
	    warn("Motif %s has %d columns in %s but %d columns in %s",
	    	ms->name, ms->minSize, siteTable, 
		motif->columnCount, motifTable);
	    fixMotif(motif, ms->minSize, motifTable, conn);
	    }
	}
    }

sqlDisconnect(&conn);
}
コード例 #5
0
ファイル: ctgFaToFa.c プロジェクト: apmagalhaes/kentUtils
void ctgFaToFa(char *ctgFa, char *ctgCoords, char *ntDir)
/* ctgFaToFa - Convert from one big file with all NT contigs to one contig per file.. */
{
struct lineFile *lf;
char fileName[512], *line;
char *ntName, *hsName;
char *parts[6];
int lineSize, partCount;
struct hash *uniqHash = newHash(0);
FILE *f = NULL;
int dotMod = 0;
struct hash *ntHash = newHash(0);
struct hash *hsHash = newHash(0);
struct ntContig *nt;
char *words[8];

printf("Loading %s\n", ctgCoords);
lf = lineFileOpen(ctgCoords, TRUE);
while (lineFileRow(lf, words))
    {
    ntName = words[0];
    if ((nt = hashFindVal(ntHash, ntName)) != NULL)
        ++nt->cloneCount;
    else
        {
	AllocVar(nt);
	hashAddSaveName(ntHash, ntName, nt, &nt->name);
	hashAddSaveName(hsHash, words[1], nt, &nt->hsName);
	nt->cloneCount = 1;
	}
    }
lineFileClose(&lf);


lf = lineFileOpen(ctgFa, FALSE);
makeDir(ntDir);
while (lineFileNext(lf, &line, &lineSize))
    {
    if ((++dotMod&0x1ffff) == 0)
        {
	printf(".");
	fflush(stdout);
	}
    if (line[0] == '>')
        {
	carefulClose(&f);
	line[lineSize-1] = 0;
	partCount = chopByChar(line, '|',parts,ArraySize(parts));
	if (partCount < 3)
	    {
	    uglyf("partCount = %d\n", partCount);
	    errAbort("Expecting | separated header line %d of %s", lf->lineIx, lf->fileName); 
	    }
	ntName = parts[1];
	nt = hashFindVal(ntHash, ntName);
	hsName = parts[2];
	if (nt == NULL)
	    {
	    hsName = firstWordInLine(ntName);
	    nt = hashMustFindVal(hsHash, hsName);
	    ntName = nt->name;
	    }
	if (nt->cloneCount > 1)
	    {
	    if (!startsWith("Hs", hsName))
	        errAbort("Expecting %s to start with 'Hs' line %d of %s",
			hsName, lf->lineIx, lf->fileName);
	    if (hashLookup(uniqHash, ntName))
	        ntName = nextFakeNtName(hsName, ntName);
	    hashAddUnique(uniqHash, ntName, NULL);
	    if (!startsWith("NT_", ntName))
		errAbort("Expecting NT_ name line %d of %s", lf->lineIx, lf->fileName); 
	    sprintf(fileName, "%s/%s.fa", ntDir, ntName);
	    f = mustOpen(fileName, "w");
	    fprintf(f, ">%s.1_1\n", ntName);
	    }
	}
    else
        {
	if (f != NULL)
	    mustWrite(f, line, lineSize);
	}
    }
printf("\n");
carefulClose(&f);
lineFileClose(&lf);
}
コード例 #6
0
void readFinfFiles(char *gsDir)
/* Read in .finf files and save info in cloneHash/cloneList. */
{
struct lineFile *lf;
struct clone *clone = NULL;
struct endInfo *end;
char fileName[512];
int i;
char *words[7];
char lastClone[64];
char cloneName[64];
int gsInfoCount = 0;
struct frag *frag;
boolean isFin;
char *s, *e;

strcpy(lastClone, "");
for (i=0; i<ArraySize(gsFiles); ++i)
    {
    isFin = (i <= 0);
    sprintf(fileName, "%s/%s", gsDir, gsFiles[i]);
    printf("Reading info from %s\n", fileName);
    lf = lineFileOpen(fileName, TRUE);
    while (lineFileRow(lf, words))
        {
	if (!sameString(words[1], lastClone))
	    {
	    struct clone *oldClone;
	    strcpy(lastClone, words[1]);
	    strcpy(cloneName, words[1]);
	    AllocVar(clone);
	    s = strchr(cloneName, '.');
	    if (s == NULL)
	        errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName);
	    if (strlen(s) >= sizeof(clone->version))
	        errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName);
	    strcpy(clone->version, s);
	    chopSuffix(cloneName);
	    clone->size = atoi(words[3]);
	    if ((oldClone = hashFindVal(cloneHash, cloneName)) != NULL)
		{
		if (isFin && clone->size == oldClone->size && sameString(clone->version, oldClone->version))
		    warn("Apparently benign duplication of %s line %d of %s", cloneName, lf->lineIx, lf->fileName);
		else
		    warn("%s duplicated line %d of %s (size %d oldSize %d)", cloneName, lf->lineIx, lf->fileName,
		    	clone->size, oldClone->size);
		}
	    hashAddSaveName(cloneHash, cloneName, clone, &clone->name);
	    clone->isFin = isFin;
	    slAddHead(&cloneList, clone);
	    }
	frag = newFrag(words[0], lf);
	slAddTail(&clone->fragList, frag);
	++clone->fragCount;
	if (!clone->isFin && !sameString(words[6], "?") && !sameString(words[6], "i") 
	   && !sameString(words[6], "w"))
	    {
	    char *s = strchr(words[0], '~');
	    char c;

	    if (s == NULL)
	        errAbort("Expecting ~ in fragment name line %d of %s\n", lf->lineIx, lf->fileName);
	    ++s;
	    AllocVar(end);
	    end->contig = cloneString(s);
	    subChar(s, '.', '_');
	    end->text = cloneString(words[6]);
	    c = lastChar(end->text);
	    if (!(c == 'L' || c == 'R'))
	        c = '?';
	    end->lr = c;
	    slAddHead(&clone->gsList, end);
	    ++gsInfoCount;
	    }
	}
    lineFileClose(&lf);
    }
printf("Found %d ends in %d clones\n", gsInfoCount, slCount(cloneList));
}