static void processOi(struct gbSelect* select, struct estOrientInfo* oi) /* process the next OI from an update OI file, possibly outputing * the alignment record */ { char acc[GB_ACC_BUFSZ]; short version = gbSplitAccVer(oi->name, acc); /* will return NULL on ignored sequences */ struct gbEntry* entry = gbReleaseFindEntry(select->release, acc); if ((entry != NULL) && (version == entry->selectVer)) { /* selected */ if (!gInclVersion) strcpy(oi->name, acc); /* remove version */ estOrientInfoTabOut(oi, gOutOi); entry->clientFlags = TRUE; /* flag so we know we got it */ } /* trace if enabled */ if (gbVerbose >= 3) { if (entry == NULL) gbVerbPr(3, "no entry: %s.%d", acc, version); else if (entry->selectVer <= 0) gbVerbPr(3, "not selected: %s.%d", acc, version); else if (version != entry->selectVer) gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer); else gbVerbPr(3, "save: %s.%d", acc, version); } }
static void checkNewEntry(struct gbSelect* select, struct gbStatusTbl* statusTbl, struct gbEntry* entry) /* check if an entry is new */ { if (entry->selectVer == NULL_VERSION) { /* new entry, get the alignment. However if the processed directory * has not been aligned yet, it might not exist, in which case, it's * ignored.*/ struct gbAligned* aligned = NULL; struct gbProcessed* processed = getProcAligned(entry, &aligned); if (!loadNonCoding && (processed != NULL) && (processed->molType != mol_mRNA)) gbVerbPr(5, "nonCoding: %s.%d %s", entry->acc, entry->processed->version, gbMolTypeSym(processed->molType)); else if (aligned != NULL) { struct gbStatus* status = gbStatusTblAdd(statusTbl, entry->acc, aligned->version, processed->modDate, entry->type, select->release->srcDb, entry->orgCat, 0, 0, aligned->update->release->version, aligned->update->shortName, 0); markNew(statusTbl, status, processed, aligned); } else if (gbVerbose >= 5) { gbVerbPr(5, "notAligned: %s.%d %s", entry->acc, entry->processed->version, gbFormatDate(entry->processed->modDate)); } } }
boolean copyFastaRec(struct gbSelect* select, struct gbFa* inFa, struct outFa* nativeFa, struct outFa* xenoFa) /* Read and copy a record to one of the output files, if selected */ { char acc[GB_ACC_BUFSZ]; unsigned version; struct gbEntry* entry; if (!gbFaReadNext(inFa)) return FALSE; /* EOF */ version = gbSplitAccVer(inFa->id, acc); entry = gbReleaseFindEntry(select->release, acc); if (entry != NULL) { char* seq = gbFaGetSeq(inFa); if (strlen(seq) < MIN_SEQ_SIZE) { if (gbVerbose >= 3) gbVerbPr(3, "skip %s, less than minimum sequence size", inFa->id); } else if ((version == entry->selectVer) && (entry->clientFlags & ALIGN_FLAG)) { outFaWrite(((entry->orgCat == GB_NATIVE) ? nativeFa : xenoFa), inFa); if (gbVerbose >= 3) gbVerbPr(3, "aligning %s %s", inFa->id, gbOrgCatName(entry->orgCat)); } else if ((version == entry->selectVer) && (entry->clientFlags & MIGRATE_FLAG)) { if (gbVerbose >= 3) gbVerbPr(3, "migrating %s %s", inFa->id, gbOrgCatName(entry->orgCat)); } else { assert(version != entry->selectVer); if (gbVerbose >= 3) gbVerbPr(3, "skip %s, wrong version %s != %d", gbOrgCatName(entry->orgCat), inFa->id, entry->selectVer); } } else { if (gbVerbose >= 3) gbVerbPr(3, "skip %s, no entry", inFa->id); } return TRUE; }
void copyPsl(struct gbSelect* select, unsigned pslFileType, struct psl* psl, char* inPsl, FILE* outPslFh, struct gbEntryCnts* counts) /* Copy a PSL. */ { char acc[GB_ACC_BUFSZ]; int version = gbSplitAccVer(psl->qName, acc); struct gbAligned* aligned; struct gbEntry* entry = getEntry(select, acc, inPsl); if (entry == NULL) errAbort("no entry for %s %s in %s", gPslFileExt[pslFileType], psl->qName, inPsl); aligned = gbEntryGetAligned(entry, select->update, version, NULL); pslTabOut(psl, outPslFh); if (pslFileType == MAIN_PSL_FILE) { /* count main psls in index. */ gbAlignedCount(aligned, 1); /* increment accession count if this is * the first one */ gbCountNeedAligned(counts, entry, ((aligned->numAligns == 1) ? 1 : 0), 1); } else { /* for rawPsl and intronPsl only count PSLs */ gbCountNeedAligned(counts, entry, 0, 1); } if (gbVerbose >= 3) gbVerbPr(3, "installing %s %s %s.%d", gbOrgCatName(entry->orgCat), gPslFileExt[pslFileType], acc, version); }
void migratePsl(struct migrateAligns* migrate, unsigned pslFileType, struct gbEntryCnts* counts, struct psl* psl, char* inPsl, FILE* outPslFh) /* Migrate PSL, if it's accession and version are flagged */ { struct gbAligned* aligned = getMigrateAligned(migrate, psl->qName, inPsl); if (aligned != NULL) { pslTabOut(psl, outPslFh); if (pslFileType == MAIN_PSL_FILE) { /* count main psls in index. */ gbAlignedCount(aligned, 1); /* increment accession count if this is the first one */ gbCountNeedAligned(counts, aligned->entry, ((aligned->numAligns == 1) ? 1 : 0), 1); } else { /* for rawPsl and intronPsl only count PSLs */ gbCountNeedAligned(counts, aligned->entry, 0, 1); } if (gbVerbose >= 3) gbVerbPr(3, "migrating %s %s %s", gbOrgCatName(aligned->entry->orgCat), gPslFileExt[pslFileType], psl->qName); } }
static void endStep(struct gbIndex* index, struct stepInfo* info) /* print the end of step message and record state */ { int numEntries = getNumEntries(index); gbVerbPr(0, "end %s: acc-added=%d, acc-total=%d ", info->step, (numEntries - info->startNumEntries), numEntries); }
static void processSeq(struct gbSelect* select, struct gbFa* inFa) /* process the next sequence from an update fasta file, possibly outputing * the sequence */ { char acc[GB_ACC_BUFSZ], hdrBuf[GB_ACC_BUFSZ], *hdr = NULL; short version = gbSplitAccVer(inFa->id, acc); /* will return NULL on ignored sequences */ struct gbEntry* entry = gbReleaseFindEntry(select->release, acc); if ((entry != NULL) && (version == entry->selectVer) && !entry->clientFlags) { /* selected, output if it appears valid */ if (isValidMrnaSeq(inFa)) { if (!gInclVersion) { /* put version in comment */ safef(hdrBuf, sizeof(hdrBuf), "%s %d", acc, version); hdr = hdrBuf; } gbFaWriteFromFa(gOutFa, inFa, hdr); entry->clientFlags = TRUE; /* flag so only gotten once */ } else { fprintf(stderr, "warning: %s does not appear to be a valid mRNA sequence, skipped: %s:%d\n", inFa->id, inFa->fileName, inFa->recLineNum); } } /* trace if enabled */ if (gbVerbose >= 3) { if (entry == NULL) gbVerbPr(3, "no entry: %s.%d", acc, version); else if (entry->selectVer <= 0) gbVerbPr(3, "not selected: %s.%d", acc, version); else if (version != entry->selectVer) gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer); else gbVerbPr(3, "save: %s.%d", acc, version); } }
static void refPepDropOne(struct sqlConnection *conn, struct brokenRefPep *brp, struct sqlDeleter* seqTblDeleter, boolean dryRun) /* drop a refPep */ { gbVerbPr(2, "%s\t%s\tdrop", sqlGetDatabase(conn), brp->protAcc); if (!dryRun) sqlDeleterAddAcc(seqTblDeleter, brp->protAcc); }
static void flagNeedAligned(struct gbSelect* select, struct gbSelect* prevSelect, struct gbProcessed* processed, struct gbAlignInfo* alignInfo) /* Function called for each sequence to set alignment and migrate flags. The * migrate flag is set in the previous and curent entries, the align flag set * in only in current ones. */ { struct gbAligned* prevAligned = NULL; if (prevSelect != NULL) prevAligned = findPrevAligned(prevSelect, processed); /* Migrate if same acc is aligned in the previous release and passed other * checks, otherwise mark the entry for alignment. */ if ((prevAligned != NULL) && canMigrate(processed, prevAligned)) { struct gbEntry* prevEntry = prevAligned->entry; prevEntry->selectVer = prevAligned->version; prevEntry->clientFlags |= MIGRATE_FLAG; processed->entry->clientFlags |= MIGRATE_FLAG; prevAligned->update->selectAlign |= prevEntry->orgCat; gbCountNeedAligned(&alignInfo->migrate, prevEntry, 1, prevAligned->numAligns); if (gbVerbose >= 3) gbVerbPr(3, "migrate %s %s.%d %d psls", gbOrgCatName(prevEntry->orgCat), prevEntry->acc, prevAligned->version, prevAligned->numAligns); } else { struct gbEntry* entry = processed->entry; entry->selectVer = processed->version; entry->clientFlags |= ALIGN_FLAG; processed->update->selectProc |= entry->orgCat; gbCountNeedAligned(&alignInfo->align, entry, 1, 0); if (gbVerbose >= 3) gbVerbPr(3, "align %s %s.%d", gbOrgCatName(entry->orgCat), entry->acc, processed->version); } }
void migrateOrientInfo(struct migrateAligns* migrate, struct estOrientInfo* oi, char* inOi, FILE* outOiFh) /* Migrate a estOrientInfo row, if it's accession and version are flagged */ { struct gbAligned* aligned = getMigrateAligned(migrate, oi->name, inOi); if (aligned != NULL) { if (gbVerbose >= 3) gbVerbPr(3, "migrating %s oi %s", gbOrgCatName(aligned->entry->orgCat), oi->name); estOrientInfoTabOut(oi, outOiFh); /* just count records */ gbCountNeedAligned(&migrate->counts.oiCnts, aligned->entry, 0, 1); } }
void copyIntronPsl(struct gbSelect* select, struct psl* psl, char* inPsl, FILE* outPslFh, struct recCounts* recCounts) /* Copy an intronPsl. */ { char acc[GB_ACC_BUFSZ]; struct gbEntry* entry; gbSplitAccVer(psl->qName, acc); entry = getEntry(select, acc, inPsl); if (entry != NULL) { if (gbVerbose >= 3) gbVerbPr(3, "installing %s intronPsl %s", gbOrgCatName(entry->orgCat), psl->qName); pslTabOut(psl, outPslFh); /* just count records */ gbCountNeedAligned(&recCounts->intronPslCnts, entry, 0, 1); } }
void copyOrientInfo(struct gbSelect* select, struct estOrientInfo* oi, char* inOi, FILE* outOiFh, struct recCounts* recCounts) /* Copy a orientInfo record. */ { char acc[GB_ACC_BUFSZ]; struct gbEntry* entry; gbSplitAccVer(oi->name, acc); entry = getEntry(select, acc, inOi); if (entry != NULL) { if (gbVerbose >= 3) gbVerbPr(3, "installing %s oi %s", gbOrgCatName(entry->orgCat), oi->name); estOrientInfoTabOut(oi, outOiFh); /* just count records */ gbCountNeedAligned(&recCounts->oiCnts, entry, 0, 1); } }
static void raInfoAdd(struct raInfoTbl *rit, struct hash *raRec, char *acc, short ver, char *seqSzFld, char *offFld, char *recSzFld, unsigned extFileId) /* add a ra mrna or pep */ { struct hashEl *hel; struct raInfo *ri; char accVer[GB_ACC_BUFSZ]; if (extFileId == 0) errAbort("no extFileId for %s.%d", acc, ver); gbVerbPr(10, "raAdd %s.%d ext %d", acc, ver, extFileId); lmAllocVar(rit->accMap->lm, ri); safef(accVer, sizeof(accVer), "%s.%d", acc, ver); hel = hashAdd(rit->accMap, accVer, ri); ri->acc = lmCloneString(rit->accMap->lm, acc); ri->version = ver; ri->size = sqlUnsigned((char*)hashMustFindVal(raRec, seqSzFld)); ri->offset = sqlLongLong((char*)hashMustFindVal(raRec, offFld)); ri->fileSize = sqlUnsigned((char*)hashMustFindVal(raRec, recSzFld)); ri->extFileId = extFileId; }