コード例 #1
0
ファイル: hgYeastRegCode.c プロジェクト: blumroy/kentUtils
void makeMotifs(char *inFile, struct hash *tfHash, char *outFile)
/* Parse input motifs and save them to outFile in dnaMotif format. */
{
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");
struct hashEl *hel;

for (;;)
    {
    char *line;
    char *words[256], *word;
    int wordCount;
    struct dnaMotif *motif;
    if (!lineFileSkipTo(lf, "Probability matrix for"))
        break;
    lineFileNeedNext(lf, &line, NULL);
    wordCount = chopLine(line, words);
    if (wordCount >= ArraySize(words))
        errAbort("Line %d of %s is too long\n", lf->lineIx, lf->fileName);
    if (!sameString(words[0], "#"))
        badFormat(lf);
    AllocVar(motif);
    motif->columnCount = wordCount-1;
    readBaseProbs(lf, words, "#A", &motif->aProb, motif->columnCount);
    readBaseProbs(lf, words, "#C", &motif->cProb, motif->columnCount);
    readBaseProbs(lf, words, "#T", &motif->tProb, motif->columnCount);
    readBaseProbs(lf, words, "#G", &motif->gProb, motif->columnCount);

    if (!lineFileSkipTo(lf, "Source:"))
	lineFileUnexpectedEnd(lf);
    lineFileReuse(lf);
    lineFileNeedNext(lf, &line, NULL);
    word = nextWord(&line);
    word = nextWord(&line);
    if (word == NULL)
        errAbort("Short Source: line %d of %s", lf->lineIx, lf->fileName);
    motif->name = cloneString(word);
    
    hel = hashLookup(tfHash, motif->name);
    if (hel == NULL)
        errAbort("%s in %s but not GFFs", motif->name, lf->fileName);
    hel->val = motif;
    dnaMotifTabOut(motif, f);
    }
carefulClose(&f);
lineFileClose(&lf);
}
コード例 #2
0
void iriToDnaMotif(char *inName, char *outName)
/* iriToDnaMotif - Convert improbRunInfo to dnaMotif. */
{
FILE *f = mustOpen(outName, "w");
static struct dnaMotif motif;
struct improbRunInfo *iriList = improbRunInfoLoadAll(inName);
struct improbRunInfo *iri;

for (iri = iriList; iri != NULL; iri = iri->next)
    {
    motif.name = iri->name;
    motif.columnCount = iri->columnCount;
    motif.aProb = iri->aProb;
    motif.cProb = iri->cProb;
    motif.gProb = iri->gProb;
    motif.tProb = iri->tProb;
    dnaMotifTabOut(&motif, f);
    }
}
コード例 #3
0
void emblMatrixToMotif(char *inName, char *outName)
/* emblMatrixToMotif - Convert transfac matrix in EMBL format to dnaMotif. */
{
struct hash *hash = NULL;
struct lineFile *lf = emblOpen(inName, NULL);
FILE *f = mustOpen(outName, "w");
struct dnaMotif *motif;

while ((hash = emblRecord(lf)) != NULL)
    {
    char *ac = hashFindVal(hash, "AC");
    char *po = hashFindVal(hash, "P0");
    if (ac != NULL && po != NULL && orgFits(hash))
        {
	motif = emblToMotif(ac, hash);
	dnaMotifTabOut(motif, f);
	dnaMotifFree(&motif);
	}
    }
}
コード例 #4
0
struct hash *loadMotifWeights(struct sqlConnection *conn, char *fileName, 
	char *table)
/* Load in XML weight motif file and save it in tab-separated format
 * and in hash keyed by motif name. */
{
struct esmMotifs *motifs = esmMotifsLoad(fileName);
struct esmMotif *motif;
FILE *f = hgCreateTabFile(tmpDir, table);
struct dyString *dy = dyStringNew(512);
struct hash *hash = newHash(16);

for (motif = motifs->esmMotif; motif != NULL; motif = motif->next)
    {
    struct esmWeights *weights = motif->esmWeights;
    int posCount = slCount(weights->esmPosition);
    struct esmPosition *pos;
    struct dnaMotif *dm;
    char name[64];


    fixMotifName(motif->Name, name, sizeof(name));
    AllocVar(dm);
    dm->name = cloneString(name);
    dm->columnCount = posCount;
    AllocArray(dm->aProb, posCount);
    AllocArray(dm->cProb, posCount);
    AllocArray(dm->gProb, posCount);
    AllocArray(dm->tProb, posCount);
    for (pos = weights->esmPosition; pos != NULL; pos = pos->next)
        {
	char *row[5];
	double odds[4], sumOdds = 0;
	int i;

	int ix = pos->Num;
	int rowSize = chopString(pos->Weights, ";", row, ArraySize(row));
	if (rowSize != 4)
	    errAbort("Expecting 4 values for weights in position %d of Motif %s",
               pos->Num, motif->Name);
	if (ix >= posCount)
	    errAbort("Num %d out of range in Motif %s", ix, motif->Name);
	for (i=0; i<4; ++i)
	    {
	    odds[i] = exp(atof(row[0]));
	    sumOdds += odds[i];
	    }
	dm->aProb[ix] = odds[0]/sumOdds;
	dm->cProb[ix] = odds[1]/sumOdds;
	dm->gProb[ix] = odds[2]/sumOdds;
	dm->tProb[ix] = odds[3]/sumOdds;
	}
    dnaMotifTabOut(dm, f);
    hashAdd(hash, dm->name, dm);
    }
dyStringPrintf(dy,
"CREATE TABLE %s (\n"
"    name varchar(16) not null,	# Motif name.\n"
"    columnCount int not null,	# Count of columns in motif.\n"
"    aProb longblob not null,	# Probability of A's in each column.\n"
"    cProb longblob not null,	# Probability of C's in each column.\n"
"    gProb longblob not null,	# Probability of G's in each column.\n"
"    tProb longblob not null,	# Probability of T's in each column.\n"
"              #Indices\n"
"    PRIMARY KEY(name)\n"
")\n", table);
sqlRemakeTable(conn, table, dy->string);
hgLoadTabFile(conn, tmpDir, table, &f);
hgRemoveTabFile(tmpDir, table);
verbose(1, "Processed %d motifs into %s\n", slCount(motifs->esmMotif), table);
return hash;
}