static void getFastaOffsets(struct brokenRefPepTbl *brpTbl, struct sqlConnection *conn, struct extFileTbl* extFileTbl, char *faPath) /* parse fasta file to get offsets of proteins */ { struct gbFa *fa = gbFaOpen(faPath, "r"); char acc[GB_ACC_BUFSZ]; struct brokenRefPep *brp; HGID extId = extFileTblGet(extFileTbl, conn, faPath); gbVerbMsg(5, "scanning fasta: %s", faPath); while (gbFaReadNext(fa)) { gbVerbMsg(5, " %s: %lld", fa->id, (long long)fa->recOff); /* save only if same acecss, version, and file (to match mrna fa) */ short ver = gbSplitAccVer(fa->id, acc); brp = hashFindVal(brpTbl->protAccHash, acc); if ((brp != NULL) && (ver == brp->protVer) && sameString(faPath, brp->newFaPath)) { gbFaGetSeq(fa); /* force read of sequence data */ brp->newFaId = extId; brp->newFaOff = fa->recOff; brp->newSeqSize = fa->seqLen; brp->newRecSize = fa->off-fa->recOff; gbVerbMsg(5, " save: %s %lld for %lld\n", fa->id, (long long)fa->recOff, (long long)fa->off); } } gbFaClose(&fa); }
void seqDataProcessUpdate(struct gbSelect* select) /* Get sequences for a partition and update. Partition processed index should * be loaded and selected versions flaged. */ { char inFasta[PATH_LEN]; struct gbFa* inFa; gbProcessedGetPath(select, "fa", inFasta); inFa = gbFaOpen(inFasta, "r"); while (gbFaReadNext(inFa)) processSeq(select, inFa); gbFaClose(&inFa); }
boolean copyFastaRec(struct gbSelect* select, struct gbFa* inFa, struct outFa* nativeFa, struct outFa* xenoFa) /* Read and copy a record to one of the output files, if selected */ { char acc[GB_ACC_BUFSZ]; unsigned version; struct gbEntry* entry; if (!gbFaReadNext(inFa)) return FALSE; /* EOF */ version = gbSplitAccVer(inFa->id, acc); entry = gbReleaseFindEntry(select->release, acc); if (entry != NULL) { char* seq = gbFaGetSeq(inFa); if (strlen(seq) < MIN_SEQ_SIZE) { if (gbVerbose >= 3) gbVerbPr(3, "skip %s, less than minimum sequence size", inFa->id); } else if ((version == entry->selectVer) && (entry->clientFlags & ALIGN_FLAG)) { outFaWrite(((entry->orgCat == GB_NATIVE) ? nativeFa : xenoFa), inFa); if (gbVerbose >= 3) gbVerbPr(3, "aligning %s %s", inFa->id, gbOrgCatName(entry->orgCat)); } else if ((version == entry->selectVer) && (entry->clientFlags & MIGRATE_FLAG)) { if (gbVerbose >= 3) gbVerbPr(3, "migrating %s %s", inFa->id, gbOrgCatName(entry->orgCat)); } else { assert(version != entry->selectVer); if (gbVerbose >= 3) gbVerbPr(3, "skip %s, wrong version %s != %d", gbOrgCatName(entry->orgCat), inFa->id, entry->selectVer); } } else { if (gbVerbose >= 3) gbVerbPr(3, "skip %s, no entry", inFa->id); } return TRUE; }