コード例 #1
0
static void parseWarnings()
/* check for various clone warning cases and flag. */
{
if (isAthersysRageEntry)
    kvtAdd(kvt, "wrn", "athRage");
else if (isOrestesEntry)
    kvtAdd(kvt, "wrn", "orestes");
}
コード例 #2
0
static void addMiscDiff(int iDiff, char *subField, char *val)
/* add a misc diff to kvt, subField can be empty */
{
char name[256];
safef(name, sizeof(name), "mdiff.%d%s", iDiff, subField);
kvtAdd(kvt, lmCloneString(kvtMem, name), val);
}
コード例 #3
0
static void parseSourceOrganism()
/* parse source /organism fields, output as srcOrg if different from org */
{
int numOrgs, i;
char **orgs;
if (gbSourceOrganism->val->stringSize == 0)
    return;
if (srcOrgBuf == NULL)
    srcOrgBuf = dyStringNew(256);
dyStringClear(srcOrgBuf);

numOrgs = chopString(gbSourceOrganism->val->string, ";", NULL, 0);
AllocArray(orgs, numOrgs);
chopString(gbSourceOrganism->val->string, ";", orgs, numOrgs);
for (i = 0; i < numOrgs; i++)
    {
    if (!sameString(orgs[i], gbOrganismField->val->string))
        {
        if (srcOrgBuf->stringSize > 0)
            dyStringAppendC(srcOrgBuf, ';');
        dyStringAppend(srcOrgBuf, orgs[i]);
        }
    }
freeMem(orgs);
if (srcOrgBuf->stringSize > 0)
    kvtAdd(kvt, "srcOrg", srcOrgBuf->string);
}
コード例 #4
0
static void updateKvt(struct keyVal **kvPtr, char* name, char *val)
/* add or update at kvt value */
{
if (*kvPtr != NULL)
    (*kvPtr)->val = val;
else 
    *kvPtr = kvtAdd(kvt, name, val);
}
コード例 #5
0
static void writePepSeq()
/* If information is available, write the peptide sequence and
 * save offset and size in kvt */
{
if ((gPepFa != NULL) && (gbProteinIdField->val->stringSize > 0)
    && (gbTranslationField->val->stringSize > 0))
    {
    int faSize;
    gbFaWriteSeq(gPepFa, gbProteinIdField->val->string, NULL,
                 gbTranslationField->val->string, -1);
    
    safef(pepSizeStr, sizeof(pepSizeStr), "%u", 
          gbTranslationField->val->stringSize);
    kvtAdd(kvt, "prs", pepSizeStr);

    safef(pepFaOffStr, sizeof(pepFaOffStr), "%lld", (long long)gPepFa->recOff);
    kvtAdd(kvt, "pfo", pepFaOffStr);

    faSize = gPepFa->off - gPepFa->recOff;
    safef(pepFaSizeStr, sizeof(pepFaSizeStr), "%d", faSize);
    kvtAdd(kvt, "pfs", pepFaSizeStr);
    }
}
コード例 #6
0
static char *findSyntheticTarget()
/* for a synthetic sequence, attempt to find the targeted organism.  This was
 * added to support the MGC/ORFeome clones.  In general, there is no defined way to
 * determine an organism that a synthenic clone targets. */
{
struct keyVal *kv;
if (synOrgBuf == NULL)
    synOrgBuf = dyStringNew(256);
dyStringClear(synOrgBuf);

kv = kvtGet(kvt, "srcOrg");
if (kv != NULL)
    dyStringAppend(synOrgBuf, kv->val);

if (synOrgBuf->stringSize > 0)
    {
    kvtAdd(kvt, "synOrg", synOrgBuf->string);
    return synOrgBuf->string;
    }
else
    return NULL;
}
コード例 #7
0
ファイル: keys.c プロジェクト: JinfengChen/pblat
void kvtParseAdd(struct kvt *kvt, char *text)
/* Add in keys from text.  Text is in format:
 *     key val
 * for each line of text. Text gets many of it's
 * space characters and newlines replaced by 0's
 * and should persist until call to keysClear(). */
{
    char *lines[256];
    int lineCount;
    int i;
    char *k, *v;

    lineCount = chopString(text, "\n\r", lines, ArraySize(lines));
    for (i=0; i<lineCount; ++i)
    {
        k = lines[i];
        if ((v = strchr(k, ' ')) != NULL)
        {
            *v++ = 0;
            kvtAdd(kvt, k, v);
        }
    }
}
コード例 #8
0
static void procGbEntry(struct lineFile *lf, struct hash *estAuthorHash)
/* process one entry in the genbank file . readGbInfo should be called
 * first */
{
char *words[16];
char date[64];
int wordCount;
DNA *dna = NULL;
int dnaSize;
char sizeString[16];
char accVer[64];
int faSize;
char *locus = gbLocusField->val->string;
char *accession = gbAccessionField->val->string;
int version = 0;
char *gi = NULL;
char *verChar = gbVersionField->val->string;
char *s;
char *org = gbOrganismField->val->string;
char *synOrg = NULL;
struct keyVal *seqKey, *sizeKey, *commentKey;
boolean isEst = FALSE;
char verNum[8];
char *com = gbCommentField->val->string;

if (locus == NULL || accession == NULL)
    errAbort("No LOCUS or no ACCESSION line near %d of %s",
             lf->lineIx, lf->fileName);
lmCleanup(&kvtMem);

/* Chop off all but first word of accession. */
s = skipLeadingSpaces(accession);
if (s != NULL)
    s = skipLeadingNonSpaces(s);
if (s != NULL)
    *s = 0;

/* Get version field (defaults to zero) */
if (verChar != NULL)
    {
    char *parts[2];
    char *accVer;
    int partCount;

    partCount = chopByWhite(verChar, parts, ArraySize(parts));

    /* Version is number after dot. */
    accVer = parts[0];
    if ((accVer = strchr(accVer, '.')) != NULL)
        version = atoi(accVer+1);
    if (partCount >= 2 && startsWith("GI:", parts[1]))
        gi = parts[1]+3;
    }

gbfFlatten(kvt);
                
/* Get additional keys. */
if (com != NULL)
    {
    if (startsWith("REVIEWED", com))
        kvtAdd(kvt, "cur", "yes");
    }
safef(verNum, sizeof(verNum), "%d", version);
kvtAdd(kvt, "ver", verNum);
if (gi != NULL)
    kvtAdd(kvt, "ngi", gi);
wordCount = chopLine(locus, words);
if (wordCount >= 6)
    {
    kvtAdd(kvt, "mol", words[3]);
    kvtAdd(kvt, "cat", words[wordCount-2]);
    ncbiDateToSqlDate(words[wordCount-1], date, sizeof(date), accession);
    kvtAdd(kvt, "dat", date);
    }
else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0]))
    {
    /* Check carefully.  Probably it's just missing the molecule type... */
    if (!isNcbiDate(words[4]))
        {
        errAbort("Strange LOCUS line in %s accession %s",
                 lf->fileName, accession);
        }
    kvtAdd(kvt, "cat", words[3]);
    ncbiDateToSqlDate(words[4], date, sizeof(date), accession);
    kvtAdd(kvt, "dat", date);
    }
else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0]))
    {
    kvtAdd(kvt, "mol", words[3]);
    }
else
    {
    errAbort("Short LOCUS line in %s accession %s",
             lf->fileName, accession);
    }
if (((wordCount >= 5) && sameString(words[4], "EST")) || 
    ((wordCount >= 6) && sameString(words[5], "EST")))
    {
    /* Try and figure out if it's a 3' or 5' EST */
    char *dir = getEstDir(gbDefinitionField->val->string, com);
    if (dir != NULL)
        kvtAdd(kvt, "dir", dir);
    isEst = TRUE;
    }

/* Handle other fields */
parseDbXrefs();
parseGene();
parseSourceOrganism();
parseMiscDiffs();
parseWarnings();

if (startsWith("synthetic construct", gbOrganismField->val->string))
    {
    synOrg = findSyntheticTarget();
    if (synOrg != NULL)
        hackSynClone();
    }

if (keepGbEntry(isEst))
    {
    /* Handle sequence part of read. */
    dna = gbfReadSequence(lf, &dnaSize);
    }
/* just discard if no sequence */
if (dna != NULL)
    {
    seqKey = kvtAdd(kvt, "seq", dna);
    safef(sizeString, sizeof(sizeString), "%d", dnaSize);
    sizeKey = kvtAdd(kvt, "siz", sizeString);
    
    if (isEst)
        {
        char *author = gbAuthorsField->val->string;
        if (author != NULL)
            {
            struct authorExample *ae;
            struct hashEl *hel;
            if ((hel = hashLookup(estAuthorHash, author)) == NULL)
                {
                AllocVar(ae);
                hel = hashAdd(estAuthorHash, author, ae);
                ae->name = hel->name;
                ae->count = 1;
                strncpy(ae->accession, accession, sizeof(ae->accession));
                slAddHead(&estAuthorList, ae);
                }
            else
                {
                ae = hel->val;
                ae->count += 1;
                }
            }
        }
    seqKey->val = NULL; /* Don't write out sequence here. */
    commentKey = kvtGet(kvt, "com");
    if (commentKey != NULL)
        commentKey->val = NULL;  /* Don't write out comment either. */

    setupOutputFiles(accession, org);

    if (faFile != NULL)
        {
        /* save fasta offset, size in ra */
        safef(accVer, sizeof(accVer), "%s.%d", accession, version);
        gbFaWriteSeq(faFile, accVer, NULL, dna, -1);
        faSize = faFile->off - faFile->recOff;
        safef(faOffStr, sizeof(faOffStr), "%lld", (long long)faFile->recOff);
        kvtAdd(kvt, "fao", faOffStr);
        safef(faSizeStr, sizeof(faSizeStr), "%d", faSize);
        kvtAdd(kvt, "fas", faSizeStr);
        }
    if (gPepFa != NULL)
        {
        /* must write before writing kvt */
        writePepSeq();
        }
    kvtWriteAll(kvt, raFile, NULL);
    if (gbIdxFile != NULL)
        {
        /* use synthetic target if it was determined */
        struct keyVal *molkv = kvtGet(kvt, "mol");
        enum molType molType = (molkv->val != NULL) ? gbParseMolType(molkv->val) : mol_mRNA;
        gbProcessedWriteIdxRec(gbIdxFile, accession, version,
                               kvtLookup(kvt, "dat"),
                               ((synOrg != NULL) ? synOrg : org),
                               molType);
        }
    }
else
    gbfSkipSequence(lf);
}