int main(int argc, char *argv[]) { char *listName = "finished.new"; char *fofName = "unfinished_acc.fof"; char *outName = "frags.txt"; FILE *out = mustOpen(outName, "w"); struct fof *fof = fofOpen(fofName, NULL); struct kvt *kvt = newKvt(64); char line[512]; int lineCount = 0; FILE *lf; char *keyText; char *s, *t; int size; lf = mustOpen(listName, "r"); while (fgets(line, sizeof(line), lf)) { ++lineCount; kvtClear(kvt); s = trimSpaces(line); t = strchr(s, '.'); if (t != NULL) *t = 0; keyText = fofFetchString(fof, s, &size); kvtParseAdd(kvt, keyText); fprintf(out, "%s phase %s frags %s\n", s, kvtLookup(kvt, "pha"), kvtLookup(kvt, "frg")); freez(&keyText); } freeKvt(&kvt); return 0; }
static void getIntVals(struct kvt *kvt, struct exp *exp, int *retLeft, int *retRight) /* Look up value for key on left hand side of expression and * literal string from right hand side. Convert both to ints. */ { char *rightString = exp->right; char *leftKey = exp->left; char *leftString = kvtLookup(kvt, leftKey); if (leftString == NULL) *retLeft = 0; else *retLeft = atoi(leftString); if (rightString == NULL) *retRight = 0; else *retRight = atoi(rightString); }
static boolean rkeyEval(struct kvt *kvt, struct exp *exp) /* Recursively evaluate expression. */ { if (exp == NULL) return TRUE; switch (exp->type) { case kxMatch: { char *key = exp->left; char *matcher = exp->right; char *val = kvtLookup(kvt, key); if (val == NULL) return sameWord(matcher, "null"); else return sameWord(matcher, val); } case kxWildMatch: { char *key = exp->left; char *matcher = exp->right; char *val = kvtLookup(kvt, key); if (val == NULL) return sameString(matcher, "*"); else return wildMatch(matcher, val); } case kxGT: { int left, right; getIntVals(kvt, exp, &left, &right); return left > right; } case kxGE: { int left, right; getIntVals(kvt, exp, &left, &right); return left >= right; } case kxLT: { int left, right; getIntVals(kvt, exp, &left, &right); return left < right; } case kxLE: { int left, right; getIntVals(kvt, exp, &left, &right); return left <= right; } case kxNot: { return !rkeyEval(kvt, exp->right); } case kxAnd: { return rkeyEval(kvt, exp->left) && rkeyEval(kvt, exp->right); } case kxOr: { return rkeyEval(kvt, exp->left) || rkeyEval(kvt, exp->right); } case kxXor: { return rkeyEval(kvt, exp->left) ^ rkeyEval(kvt, exp->right); } default: { errAbort("unknown expression type %d", exp->type); return 0; } } }
/* Look up value for key on left hand side of expression and * literal string from right hand side. Convert both to ints. */ { char *rightString = exp->right; char *leftKey = exp->left; char *leftString = kvtLookup(kvt, leftKey); if (leftString == NULL) *retLeft = 0; else *retLeft = atoi(leftString); if (rightString == NULL) *retRight = 0; else *retRight = atoi(rightString); } #if 0 /* unused */ static void dumpExp(struct kvt *kvt, struct exp *exp) /* Print out expression. */ { switch (exp->type) { case kxMatch: { char *key = exp->left; char *matcher = exp->right; char *val = kvtLookup(kvt, key); printf("%s(%s) match %s\n", key, val, matcher); break; } case kxWildMatch: { char *key = exp->left; char *matcher = exp->right; char *val = kvtLookup(kvt, key); printf("%s(%s) wildMatch %s\n", key, val, matcher); break; } case kxGT: { int left, right; getIntVals(kvt, exp, &left, &right); printf("%d > %d\n", left, right); break; } case kxGE: { int left, right; getIntVals(kvt, exp, &left, &right); printf("%d >= %d\n", left, right); break; } case kxLT: { int left, right; getIntVals(kvt, exp, &left, &right); printf("%d < %d\n", left, right); break; } case kxLE: { int left, right; getIntVals(kvt, exp, &left, &right); printf("%d <= %d\n", left, right); break; } case kxNot: { printf("!\n"); break; } case kxAnd: { printf("&\n"); break; } case kxOr: { printf("|\n"); break; } case kxXor: { printf("^\n"); break; } } }
static void procGbEntry(struct lineFile *lf, struct hash *estAuthorHash) /* process one entry in the genbank file . readGbInfo should be called * first */ { char *words[16]; char date[64]; int wordCount; DNA *dna = NULL; int dnaSize; char sizeString[16]; char accVer[64]; int faSize; char *locus = gbLocusField->val->string; char *accession = gbAccessionField->val->string; int version = 0; char *gi = NULL; char *verChar = gbVersionField->val->string; char *s; char *org = gbOrganismField->val->string; char *synOrg = NULL; struct keyVal *seqKey, *sizeKey, *commentKey; boolean isEst = FALSE; char verNum[8]; char *com = gbCommentField->val->string; if (locus == NULL || accession == NULL) errAbort("No LOCUS or no ACCESSION line near %d of %s", lf->lineIx, lf->fileName); lmCleanup(&kvtMem); /* Chop off all but first word of accession. */ s = skipLeadingSpaces(accession); if (s != NULL) s = skipLeadingNonSpaces(s); if (s != NULL) *s = 0; /* Get version field (defaults to zero) */ if (verChar != NULL) { char *parts[2]; char *accVer; int partCount; partCount = chopByWhite(verChar, parts, ArraySize(parts)); /* Version is number after dot. */ accVer = parts[0]; if ((accVer = strchr(accVer, '.')) != NULL) version = atoi(accVer+1); if (partCount >= 2 && startsWith("GI:", parts[1])) gi = parts[1]+3; } gbfFlatten(kvt); /* Get additional keys. */ if (com != NULL) { if (startsWith("REVIEWED", com)) kvtAdd(kvt, "cur", "yes"); } safef(verNum, sizeof(verNum), "%d", version); kvtAdd(kvt, "ver", verNum); if (gi != NULL) kvtAdd(kvt, "ngi", gi); wordCount = chopLine(locus, words); if (wordCount >= 6) { kvtAdd(kvt, "mol", words[3]); kvtAdd(kvt, "cat", words[wordCount-2]); ncbiDateToSqlDate(words[wordCount-1], date, sizeof(date), accession); kvtAdd(kvt, "dat", date); } else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0])) { /* Check carefully. Probably it's just missing the molecule type... */ if (!isNcbiDate(words[4])) { errAbort("Strange LOCUS line in %s accession %s", lf->fileName, accession); } kvtAdd(kvt, "cat", words[3]); ncbiDateToSqlDate(words[4], date, sizeof(date), accession); kvtAdd(kvt, "dat", date); } else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0])) { kvtAdd(kvt, "mol", words[3]); } else { errAbort("Short LOCUS line in %s accession %s", lf->fileName, accession); } if (((wordCount >= 5) && sameString(words[4], "EST")) || ((wordCount >= 6) && sameString(words[5], "EST"))) { /* Try and figure out if it's a 3' or 5' EST */ char *dir = getEstDir(gbDefinitionField->val->string, com); if (dir != NULL) kvtAdd(kvt, "dir", dir); isEst = TRUE; } /* Handle other fields */ parseDbXrefs(); parseGene(); parseSourceOrganism(); parseMiscDiffs(); parseWarnings(); if (startsWith("synthetic construct", gbOrganismField->val->string)) { synOrg = findSyntheticTarget(); if (synOrg != NULL) hackSynClone(); } if (keepGbEntry(isEst)) { /* Handle sequence part of read. */ dna = gbfReadSequence(lf, &dnaSize); } /* just discard if no sequence */ if (dna != NULL) { seqKey = kvtAdd(kvt, "seq", dna); safef(sizeString, sizeof(sizeString), "%d", dnaSize); sizeKey = kvtAdd(kvt, "siz", sizeString); if (isEst) { char *author = gbAuthorsField->val->string; if (author != NULL) { struct authorExample *ae; struct hashEl *hel; if ((hel = hashLookup(estAuthorHash, author)) == NULL) { AllocVar(ae); hel = hashAdd(estAuthorHash, author, ae); ae->name = hel->name; ae->count = 1; strncpy(ae->accession, accession, sizeof(ae->accession)); slAddHead(&estAuthorList, ae); } else { ae = hel->val; ae->count += 1; } } } seqKey->val = NULL; /* Don't write out sequence here. */ commentKey = kvtGet(kvt, "com"); if (commentKey != NULL) commentKey->val = NULL; /* Don't write out comment either. */ setupOutputFiles(accession, org); if (faFile != NULL) { /* save fasta offset, size in ra */ safef(accVer, sizeof(accVer), "%s.%d", accession, version); gbFaWriteSeq(faFile, accVer, NULL, dna, -1); faSize = faFile->off - faFile->recOff; safef(faOffStr, sizeof(faOffStr), "%lld", (long long)faFile->recOff); kvtAdd(kvt, "fao", faOffStr); safef(faSizeStr, sizeof(faSizeStr), "%d", faSize); kvtAdd(kvt, "fas", faSizeStr); } if (gPepFa != NULL) { /* must write before writing kvt */ writePepSeq(); } kvtWriteAll(kvt, raFile, NULL); if (gbIdxFile != NULL) { /* use synthetic target if it was determined */ struct keyVal *molkv = kvtGet(kvt, "mol"); enum molType molType = (molkv->val != NULL) ? gbParseMolType(molkv->val) : mol_mRNA; gbProcessedWriteIdxRec(gbIdxFile, accession, version, kvtLookup(kvt, "dat"), ((synOrg != NULL) ? synOrg : org), molType); } } else gbfSkipSequence(lf); }
void scanFile(struct keyExp *exp, char *fileName) /* Scan file for things that match expression. */ { FILE *f = mustOpen(fileName, "r"); int keyBufSize = 32*1024; char *keysBuf = needMem(keyBufSize+1); int lastc, c; int kbIx; int modMax = 10000; int mod = modMax; printf("scanning %s", fileName); fflush(stdout); for (;;) { if (--mod <= 0) { printf("."); fflush(stdout); mod = modMax; } kvtClear(kvt); kbIx = 0; lastc = 0; for (;;) { if (((c = fgetc(f)) == EOF) || (c == '\n' && lastc == '\n')) break; keysBuf[kbIx] = c; if (++kbIx >= keyBufSize) errAbort("Record too long\n"); lastc = c; } if (kbIx > 0) { kvtParseAdd(kvt, keysBuf); if (exp == NULL || keyExpEval(exp, kvt)) { ++matchCount; if (command == ctPrint) { char *kv = kvtLookup(kvt, selectKey); if (kv == NULL) kv = "NULL"; fprintf(out, "%s\n", kv); } if (command == ctStats || command == ctHist) { char *kv = kvtLookup(kvt, selectKey); struct useCount *u; struct hashEl *hel; if (kv == NULL) kv = "NULL"; if ((hel = hashLookup(statHash, kv)) == NULL) { AllocVar(u); hel = hashAdd(statHash, kv, u); u->what = hel->name; u->count = 1; slAddHead(&useCounts, u); } else { u = hel->val; ++u->count; } } } } if (c == EOF) break; } freeMem(keysBuf); fclose(f); printf("\n"); }