Exemplo n.º 1
0
static boolean raRecLoad(struct raInfoTbl *rit, unsigned srcDb, struct lineFile *raLf,
                         unsigned cdnaExtId, unsigned pepExtId)
/* load next ra record */
{
    char *acc, *protAccVer, protAcc[GB_ACC_BUFSZ];
    int ver;
    struct hash *raRec = raNextRecord(raLf);
    if (raRec == NULL)
        return FALSE;
    acc = hashMustFindVal(raRec, "acc");
    ver = sqlSigned((char*)hashMustFindVal(raRec, "ver"));
    raInfoAdd(rit, raRec, acc, ver, "siz", "fao", "fas", cdnaExtId);

    if ((srcDb == GB_REFSEQ) && ((protAccVer = hashFindVal(raRec, "prt")) != NULL))
    {
        if (pepExtId == 0)
            errAbort("%s has protein %s, but no pep.fa file", acc, protAccVer);
        ver = gbSplitAccVer(protAccVer, protAcc);
        raInfoAdd(rit, raRec, protAcc, ver, "prs", "pfo", "pfs", pepExtId);
    }
#ifdef DUMP_HASH_STATS
    hashPrintStats(raRec, "raRec", stderr);
#endif
    hashFree(&raRec);
    return TRUE;
}
Exemplo n.º 2
0
void expAdd(char *file)
/* Add rows from .ra file */
{
struct hash *ra = NULL;
struct lineFile *lf = lineFileOpen(file, TRUE);
struct encodeExp *exp;
struct hash *oldExps;
char *key;

/* create hash of keys for existing experiments so we can distinguish new ones */
oldExps = expKeyHashFromTable(connExp, table);

verbose(1, "Adding experiments from file \'%s\' to table \'%s\'\n", file, table);
while ((ra = raNextRecord(lf)) != NULL)
    {
    exp = encodeExpFromRa(ra);
    key = encodeExpKey(exp);
    if (hashLookup(oldExps, key) == NULL)
        {
        verbose(2, "Adding new experiment: %s\n", key);
        encodeExpAdd(connExp, table, exp);
        }
    else
        verbose(2, "Old experiment: %s\n", key);
    }
}
Exemplo n.º 3
0
Arquivo: ra.c Projeto: bh0085/kent
struct hash *raReadThreeLevels(char *fileName, char *lowKeyField, char *middleKeyField)
/* Return 3 level hash that contains all ra records in file keyed by lowKeyField, which must exist.
 * and broken into sub hashes based upon middleKeyField that must exist.
 * Example raReadThreeLevels("cv.ra","term","type"):
 *         returns hash of 'type' hashes of 'term' hashes of every stanza in cv.ra */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *topHash = hashNew(0); // Not expecting that many types
struct hash *bottomHash;
while ((bottomHash = raNextRecord(lf)) != NULL)
    {
    char *lowKey = hashFindVal(bottomHash, lowKeyField);
    if (lowKey == NULL)
        errAbort("Couldn't find key field %s line %d of %s",
                lowKeyField, lf->lineIx, lf->fileName);

    char *middleKey = hashFindVal(bottomHash, middleKeyField);
    if (middleKey == NULL)
        errAbort("Couldn't find middle key field %s line %d of %s",
                middleKeyField, lf->lineIx, lf->fileName);

    struct hash *middleHash = hashFindVal(topHash, middleKey);
    if (middleHash == NULL)
        {
        middleHash = hashNew(16); // could be quite a few terms per type.
        hashAdd(topHash, middleKey, middleHash);
        }
    hashAdd(middleHash, lowKey, bottomHash);
    }
lineFileClose(&lf);
if (hashNumEntries(topHash) == 0)
    hashFree(&topHash);
return topHash;
}
Exemplo n.º 4
0
Arquivo: ra.c Projeto: bh0085/kent
struct hash *raReadWithFilter(char *fileName, char *keyField,char *filterKey,char *filterValue)
/* Return hash that contains all filtered ra records in file keyed by given field, which must exist.
 * The values of the hash are themselves hashes.  The filter is a key/value pair that must exist.
 * Example raReadWithFilter(file,"term","type","antibody"): returns hash of hashes of every term with type=antibody */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *bigHash = hashNew(14);
struct hash *hash;
while ((hash = raNextRecord(lf)) != NULL)
    {
    char *key = hashFindVal(hash, keyField);
    if (key == NULL)
        errAbort("Couldn't find key field %s line %d of %s",
                keyField, lf->lineIx, lf->fileName);
    if (filterKey != NULL)
        {
        char *filter = hashFindVal(hash, filterKey);
        if (filter == NULL)
            {
            hashFree(&hash);
            continue;
            }
        if (filterValue != NULL && differentString(filterValue,filter))
            {
            hashFree(&hash);
            continue;
            }
        }
        hashAdd(bigHash, key, hash);
    }
lineFileClose(&lf);
if (hashNumEntries(bigHash) == 0)
    hashFree(&bigHash);
return bigHash;
}
Exemplo n.º 5
0
struct hash *raReadSingle(char *fileName)
/* Read in first ra record in file and return as hash. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = raNextRecord(lf);
lineFileClose(&lf);
return hash;
}
Exemplo n.º 6
0
struct hash *raReadAll(char *fileName, char *keyField)
/* Return hash that contains all ra records in file keyed
 * by given field, which must exist.  The values of the
 * hash are themselves hashes. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *bigHash = hashNew(0);
struct hash *hash;
while ((hash = raNextRecord(lf)) != NULL)
    {
    char *key = hashFindVal(hash, keyField);
    if (key == NULL)
        errAbort("Couldn't find key field %s line %d of %s",
		keyField, lf->lineIx, lf->fileName);
    hashAdd(bigHash, key, hash);
    }
lineFileClose(&lf);
return bigHash;
}
Exemplo n.º 7
0
void expRestoreTable(char *file)
/* Fill empty table with experiments in .ra file with id's */
{
struct hash *ra = NULL;
struct lineFile *lf = lineFileOpen(file, TRUE);
struct encodeExp *exp;
int ix = 1;
int expId;
char *accession;
char *key;

verbose(1, "Restoring experiments from file \'%s\' to table \'%s\'\n", file, table);
if (sqlRowCount(connExp, sqlCheckIdentifier(table)) != 0)
    errAbort("ERROR: table for restore must exist and be empty");

while ((ra = raNextRecord(lf)) != NULL)
    {
    exp = encodeExpFromRa(ra);

    /* save accession and id as we may stomp on these for to-delete experiments */
    accession = cloneString(exp->lab);
    expId = exp->ix;

    key = encodeExpKey(exp);
    while (ix < expId)
        {
        exp->accession = "DELETED";
        exp->ix = ix;
        verbose(3, "Adding row for deleted experiment %d\n", ix);
        encodeExpAdd(connExp, table, exp);
        ix++;
        }
    /* restore accession and id */
    exp->accession = accession;
    exp->ix = expId;
    encodeExpAdd(connExp, table, exp);
    verbose(3, "Adding row for experiment %d: %s\n", ix, key);
    ix++;
    }
verbose(1, "To complete restore, delete rows where accession=DELETED\n");
}
Exemplo n.º 8
0
struct hash *readRefRa(char *fileName)
/* Read in refSeq ra file and return bits we're interested
 * in in a hash full of refSeqInfos. */
{
struct hash *hash = newHash(16);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *ra;
int count = 0, cdsCount = 0;

while ((ra = raNextRecord(lf)) != NULL)
    {
    char *acc = hashFindVal(ra, "acc");
    if (acc != NULL)
        {
	char *cds = hashFindVal(ra, "cds");
	char *siz = hashFindVal(ra, "siz");
	struct refSeqInfo *rsi;
	if (siz == NULL)
	    {
	    warn("No size for %s, skipping", acc);
	    continue;
	    }
	AllocVar(rsi);
	hashAddSaveName(hash, acc, rsi, &rsi->acc);
	rsi->size = atoi(siz);
	if (cds != NULL)
	    {
	    rsi->hasCds = parseCds(cds, 0, rsi->size, 
	    	&rsi->cdsStart, &rsi->cdsEnd);
	    if (rsi->hasCds)
	        ++cdsCount;
	    }
	++count;
	}
    hashFree(&ra);
    }
lineFileClose(&lf);
printf("Got %d cds of %d in %s\n", cdsCount, count, fileName);
return hash;
}
Exemplo n.º 9
0
void txReadRa(char *mrnaRa, char *refSeqRa, char *outDir)
/* txReadRa - Read ra files from genbank and parse out relevant info into some 
 * tab-separated files. */
{
struct lineFile *mrna = lineFileOpen(mrnaRa, TRUE);
struct lineFile *refSeq = lineFileOpen(refSeqRa, TRUE);
makeDir(outDir);
FILE *fCds = openToWrite(outDir, "cds.tab");
FILE *fStatus = openToWrite(outDir, "refSeqStatus.tab");
FILE *fSize = openToWrite(outDir, "mrnaSize.tab");
FILE *fRefToPep = openToWrite(outDir, "refToPep.tab");
FILE *fPepStatus = openToWrite(outDir, "refPepStatus.tab");
FILE *fExceptions = openToWrite(outDir, "exceptions.tab");
FILE *fAccVer = openToWrite(outDir, "accVer.tab");

struct hash *ra;
while ((ra = raNextRecord(refSeq)) != NULL)
    {
    char *acc = requiredField(ra, refSeq, "acc");
    char *rss = requiredField(ra, refSeq, "rss");
    char *siz = requiredField(ra, refSeq, "siz");
    char *ver = requiredField(ra, mrna, "ver");
    char *prt = hashFindVal(ra, "prt");
    char *cds = hashFindVal(ra, "cds");

    /* Translate rss into status. */
    char *status = NULL;
    if (sameString(rss, "rev"))
	status = "Reviewed";
    else if (sameString(rss, "pro"))
	status = "Provisional";
    else if (sameString(rss, "pre"))
	status = "Predicted";
    else if (sameString(rss, "val"))
	status = "Validated";
    else if (sameString(rss, "inf"))
	status = "Inferred";
    else
	errAbort("Unrecognized rss field %s after line %d of %s", rss, 
	    refSeq->lineIx, refSeq->fileName);

    fprintf(fStatus, "%s.%s\t%s\n", acc, ver, status);
    if (prt != NULL)
	{
	fprintf(fPepStatus, "%s\t%s\n", prt, status);
	fprintf(fRefToPep, "%s.%s\t%s\n", acc, ver, prt);
	}
    fprintf(fSize, "%s.%s\t%s\n", acc, ver, siz);
    if (cds != NULL)
	fprintf(fCds, "%s.%s\t%s\n", acc, ver, cds);
    outputExceptions(acc, ver, ra, fExceptions);
    fprintf(fAccVer, "%s\t%s.%s\n", acc, acc, ver);
    hashFree(&ra);
    }

while ((ra = raNextRecord(mrna)) != NULL)
    {
    char *acc = requiredField(ra, mrna, "acc");
    char *siz = requiredField(ra, mrna, "siz");
    char *ver = requiredField(ra, mrna, "ver");
    char *cds = hashFindVal(ra, "cds");
    fprintf(fSize, "%s.%s\t%s\n", acc, ver, siz);
    if (cds != NULL)
    	fprintf(fCds, "%s.%s\t%s\n", acc, ver, cds);
    outputExceptions(acc, ver, ra, fExceptions);
    fprintf(fAccVer, "%s\t%s.%s\n", acc, acc, ver);
    hashFree(&ra);
    }

carefulClose(&fCds);
carefulClose(&fStatus);
carefulClose(&fSize);
carefulClose(&fRefToPep);
carefulClose(&fPepStatus);
carefulClose(&fExceptions);
carefulClose(&fAccVer);
}