Esempio n. 1
0
int main(int argc, char *argv[])
{
    char *listName = "finished.new";
    char *fofName = "unfinished_acc.fof";
    char *outName = "frags.txt";
    FILE *out = mustOpen(outName, "w");
    struct fof *fof = fofOpen(fofName, NULL);
    struct kvt *kvt = newKvt(64);
    char line[512];
    int lineCount = 0;
    FILE *lf;
    char *keyText;
    char *s, *t;
    int size;

    lf = mustOpen(listName, "r");
    while (fgets(line, sizeof(line), lf))
    {
        ++lineCount;
        kvtClear(kvt);
        s = trimSpaces(line);
        t = strchr(s, '.');
        if (t != NULL)
            *t = 0;
        keyText = fofFetchString(fof, s, &size);
        kvtParseAdd(kvt, keyText);
        fprintf(out, "%s phase %s frags %s\n", s, kvtLookup(kvt, "pha"), kvtLookup(kvt, "frg"));
        freez(&keyText);
    }
    freeKvt(&kvt);
    return 0;
}
Esempio n. 2
0
static void getIntVals(struct kvt *kvt, struct exp *exp, int *retLeft, int *retRight)
/* Look up value for key on left hand side of expression and
 * literal string from right hand side.  Convert both to ints. */
{
    char *rightString = exp->right;
    char *leftKey = exp->left;
    char *leftString = kvtLookup(kvt, leftKey);

    if (leftString == NULL)
        *retLeft = 0;
    else
        *retLeft = atoi(leftString);
    if (rightString == NULL)
        *retRight = 0;
    else
        *retRight = atoi(rightString);
}
Esempio n. 3
0
static boolean rkeyEval(struct kvt *kvt, struct exp *exp)
/* Recursively evaluate expression. */
{
    if (exp == NULL)
        return TRUE;
    switch (exp->type)
    {
    case kxMatch:
    {
        char *key = exp->left;
        char *matcher = exp->right;
        char *val = kvtLookup(kvt, key);
        if (val == NULL)
            return sameWord(matcher, "null");
        else
            return sameWord(matcher, val);
    }
    case kxWildMatch:
    {
        char *key = exp->left;
        char *matcher = exp->right;
        char *val = kvtLookup(kvt, key);
        if (val == NULL)
            return sameString(matcher, "*");
        else
            return wildMatch(matcher, val);
    }
    case kxGT:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        return left > right;
    }
    case kxGE:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        return left >= right;
    }
    case kxLT:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        return left < right;
    }
    case kxLE:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        return left <= right;
    }

    case kxNot:
    {
        return !rkeyEval(kvt, exp->right);
    }
    case kxAnd:
    {
        return rkeyEval(kvt, exp->left) && rkeyEval(kvt, exp->right);
    }
    case kxOr:
    {
        return rkeyEval(kvt, exp->left) || rkeyEval(kvt, exp->right);
    }
    case kxXor:
    {
        return rkeyEval(kvt, exp->left) ^ rkeyEval(kvt, exp->right);
    }
    default:
    {
        errAbort("unknown expression type %d", exp->type);
        return 0;
    }
    }
}
Esempio n. 4
0
/* Look up value for key on left hand side of expression and
 * literal string from right hand side.  Convert both to ints. */
{
    char *rightString = exp->right;
    char *leftKey = exp->left;
    char *leftString = kvtLookup(kvt, leftKey);

    if (leftString == NULL)
        *retLeft = 0;
    else
        *retLeft = atoi(leftString);
    if (rightString == NULL)
        *retRight = 0;
    else
        *retRight = atoi(rightString);
}

#if 0 /* unused */
static void dumpExp(struct kvt *kvt, struct exp *exp)
/* Print out expression. */
{
    switch (exp->type)
    {
    case kxMatch:
    {
        char *key = exp->left;
        char *matcher = exp->right;
        char *val = kvtLookup(kvt, key);
        printf("%s(%s) match %s\n", key, val, matcher);
        break;
    }
    case kxWildMatch:
    {
        char *key = exp->left;
        char *matcher = exp->right;
        char *val = kvtLookup(kvt, key);
        printf("%s(%s) wildMatch %s\n", key, val, matcher);
        break;
    }
    case kxGT:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        printf("%d > %d\n", left, right);
        break;
    }
    case kxGE:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        printf("%d >= %d\n", left, right);
        break;
    }
    case kxLT:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        printf("%d < %d\n", left, right);
        break;
    }
    case kxLE:
    {
        int left, right;
        getIntVals(kvt, exp, &left, &right);
        printf("%d <= %d\n", left, right);
        break;
    }

    case kxNot:
    {
        printf("!\n");
        break;
    }
    case kxAnd:
    {
        printf("&\n");
        break;
    }
    case kxOr:
    {
        printf("|\n");
        break;
    }
    case kxXor:
    {
        printf("^\n");
        break;
    }
    }
}
Esempio n. 5
0
static void procGbEntry(struct lineFile *lf, struct hash *estAuthorHash)
/* process one entry in the genbank file . readGbInfo should be called
 * first */
{
char *words[16];
char date[64];
int wordCount;
DNA *dna = NULL;
int dnaSize;
char sizeString[16];
char accVer[64];
int faSize;
char *locus = gbLocusField->val->string;
char *accession = gbAccessionField->val->string;
int version = 0;
char *gi = NULL;
char *verChar = gbVersionField->val->string;
char *s;
char *org = gbOrganismField->val->string;
char *synOrg = NULL;
struct keyVal *seqKey, *sizeKey, *commentKey;
boolean isEst = FALSE;
char verNum[8];
char *com = gbCommentField->val->string;

if (locus == NULL || accession == NULL)
    errAbort("No LOCUS or no ACCESSION line near %d of %s",
             lf->lineIx, lf->fileName);
lmCleanup(&kvtMem);

/* Chop off all but first word of accession. */
s = skipLeadingSpaces(accession);
if (s != NULL)
    s = skipLeadingNonSpaces(s);
if (s != NULL)
    *s = 0;

/* Get version field (defaults to zero) */
if (verChar != NULL)
    {
    char *parts[2];
    char *accVer;
    int partCount;

    partCount = chopByWhite(verChar, parts, ArraySize(parts));

    /* Version is number after dot. */
    accVer = parts[0];
    if ((accVer = strchr(accVer, '.')) != NULL)
        version = atoi(accVer+1);
    if (partCount >= 2 && startsWith("GI:", parts[1]))
        gi = parts[1]+3;
    }

gbfFlatten(kvt);
                
/* Get additional keys. */
if (com != NULL)
    {
    if (startsWith("REVIEWED", com))
        kvtAdd(kvt, "cur", "yes");
    }
safef(verNum, sizeof(verNum), "%d", version);
kvtAdd(kvt, "ver", verNum);
if (gi != NULL)
    kvtAdd(kvt, "ngi", gi);
wordCount = chopLine(locus, words);
if (wordCount >= 6)
    {
    kvtAdd(kvt, "mol", words[3]);
    kvtAdd(kvt, "cat", words[wordCount-2]);
    ncbiDateToSqlDate(words[wordCount-1], date, sizeof(date), accession);
    kvtAdd(kvt, "dat", date);
    }
else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0]))
    {
    /* Check carefully.  Probably it's just missing the molecule type... */
    if (!isNcbiDate(words[4]))
        {
        errAbort("Strange LOCUS line in %s accession %s",
                 lf->fileName, accession);
        }
    kvtAdd(kvt, "cat", words[3]);
    ncbiDateToSqlDate(words[4], date, sizeof(date), accession);
    kvtAdd(kvt, "dat", date);
    }
else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0]))
    {
    kvtAdd(kvt, "mol", words[3]);
    }
else
    {
    errAbort("Short LOCUS line in %s accession %s",
             lf->fileName, accession);
    }
if (((wordCount >= 5) && sameString(words[4], "EST")) || 
    ((wordCount >= 6) && sameString(words[5], "EST")))
    {
    /* Try and figure out if it's a 3' or 5' EST */
    char *dir = getEstDir(gbDefinitionField->val->string, com);
    if (dir != NULL)
        kvtAdd(kvt, "dir", dir);
    isEst = TRUE;
    }

/* Handle other fields */
parseDbXrefs();
parseGene();
parseSourceOrganism();
parseMiscDiffs();
parseWarnings();

if (startsWith("synthetic construct", gbOrganismField->val->string))
    {
    synOrg = findSyntheticTarget();
    if (synOrg != NULL)
        hackSynClone();
    }

if (keepGbEntry(isEst))
    {
    /* Handle sequence part of read. */
    dna = gbfReadSequence(lf, &dnaSize);
    }
/* just discard if no sequence */
if (dna != NULL)
    {
    seqKey = kvtAdd(kvt, "seq", dna);
    safef(sizeString, sizeof(sizeString), "%d", dnaSize);
    sizeKey = kvtAdd(kvt, "siz", sizeString);
    
    if (isEst)
        {
        char *author = gbAuthorsField->val->string;
        if (author != NULL)
            {
            struct authorExample *ae;
            struct hashEl *hel;
            if ((hel = hashLookup(estAuthorHash, author)) == NULL)
                {
                AllocVar(ae);
                hel = hashAdd(estAuthorHash, author, ae);
                ae->name = hel->name;
                ae->count = 1;
                strncpy(ae->accession, accession, sizeof(ae->accession));
                slAddHead(&estAuthorList, ae);
                }
            else
                {
                ae = hel->val;
                ae->count += 1;
                }
            }
        }
    seqKey->val = NULL; /* Don't write out sequence here. */
    commentKey = kvtGet(kvt, "com");
    if (commentKey != NULL)
        commentKey->val = NULL;  /* Don't write out comment either. */

    setupOutputFiles(accession, org);

    if (faFile != NULL)
        {
        /* save fasta offset, size in ra */
        safef(accVer, sizeof(accVer), "%s.%d", accession, version);
        gbFaWriteSeq(faFile, accVer, NULL, dna, -1);
        faSize = faFile->off - faFile->recOff;
        safef(faOffStr, sizeof(faOffStr), "%lld", (long long)faFile->recOff);
        kvtAdd(kvt, "fao", faOffStr);
        safef(faSizeStr, sizeof(faSizeStr), "%d", faSize);
        kvtAdd(kvt, "fas", faSizeStr);
        }
    if (gPepFa != NULL)
        {
        /* must write before writing kvt */
        writePepSeq();
        }
    kvtWriteAll(kvt, raFile, NULL);
    if (gbIdxFile != NULL)
        {
        /* use synthetic target if it was determined */
        struct keyVal *molkv = kvtGet(kvt, "mol");
        enum molType molType = (molkv->val != NULL) ? gbParseMolType(molkv->val) : mol_mRNA;
        gbProcessedWriteIdxRec(gbIdxFile, accession, version,
                               kvtLookup(kvt, "dat"),
                               ((synOrg != NULL) ? synOrg : org),
                               molType);
        }
    }
else
    gbfSkipSequence(lf);
}
Esempio n. 6
0
void scanFile(struct keyExp *exp, char *fileName)
/* Scan file for things that match expression. */
{
FILE *f = mustOpen(fileName, "r");
int keyBufSize = 32*1024;
char *keysBuf = needMem(keyBufSize+1);
int lastc, c;
int kbIx;
int modMax = 10000;
int mod = modMax;

printf("scanning %s", fileName);
fflush(stdout);
for (;;)
    {
    if (--mod <= 0)
        {
        printf(".");
        fflush(stdout);
        mod = modMax;
        }
    kvtClear(kvt);
    kbIx = 0;
    lastc = 0;
    for (;;)
        {
        if (((c = fgetc(f)) == EOF) || (c == '\n' && lastc == '\n'))
            break;         
        keysBuf[kbIx] = c;
        if (++kbIx >= keyBufSize)
            errAbort("Record too long\n");
        lastc = c;
        }
    if (kbIx > 0)
        {
        kvtParseAdd(kvt, keysBuf);
        if (exp == NULL || keyExpEval(exp, kvt))
            {
            ++matchCount;
            if (command == ctPrint)
                {
                char *kv = kvtLookup(kvt, selectKey);
                if (kv == NULL)
                    kv = "NULL";
                fprintf(out, "%s\n", kv);
                }
            if (command == ctStats || command == ctHist)
                {
                char *kv = kvtLookup(kvt, selectKey);
                struct useCount *u;
                struct hashEl *hel;
                if (kv == NULL)
                    kv = "NULL";
                if ((hel = hashLookup(statHash, kv)) == NULL)
                    {
                    AllocVar(u);
                    hel = hashAdd(statHash, kv, u);
                    u->what = hel->name;
                    u->count = 1;
                    slAddHead(&useCounts, u);
                    }
                else
                    {
                    u = hel->val;
                    ++u->count;
                    }
                }
            }
        }
    if (c == EOF)
        break;
    }
freeMem(keysBuf);
fclose(f);
printf("\n");
}