static void processOi(struct gbSelect* select, struct estOrientInfo* oi) /* process the next OI from an update OI file, possibly outputing * the alignment record */ { char acc[GB_ACC_BUFSZ]; short version = gbSplitAccVer(oi->name, acc); /* will return NULL on ignored sequences */ struct gbEntry* entry = gbReleaseFindEntry(select->release, acc); if ((entry != NULL) && (version == entry->selectVer)) { /* selected */ if (!gInclVersion) strcpy(oi->name, acc); /* remove version */ estOrientInfoTabOut(oi, gOutOi); entry->clientFlags = TRUE; /* flag so we know we got it */ } /* trace if enabled */ if (gbVerbose >= 3) { if (entry == NULL) gbVerbPr(3, "no entry: %s.%d", acc, version); else if (entry->selectVer <= 0) gbVerbPr(3, "not selected: %s.%d", acc, version); else if (version != entry->selectVer) gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer); else gbVerbPr(3, "save: %s.%d", acc, version); } }
struct gbEntry* getEntry(struct gbSelect* select, char* acc, char* refFile) /* Get the entry obj for an accession referenced in a file. If not found and * accession ignored, return NULL, otherwise it's and error. This allows for * an ignored entry to be added after alignment. Also check that the entry is * part of the current update. This handles this an ignored accession being * found in a PSL, etc. */ { struct gbEntry* entry = gbReleaseFindEntry(select->release, acc); if (entry == NULL) { /* note: this isn't actually checking for a specific acc/moddate being * ignored, it's just not generating an error if the entry is not found * and it's ignored for any moddate. */ if (gbIgnoreFind(select->release->ignore, acc) != NULL) return NULL; // igored, assume the best errAbort("can't find accession \"%s\" in gbIndex, referenced in %s", acc, refFile); } /* check for being in this update */ assert(select->update != NULL); if (gbEntryFindUpdateProcessed(entry, select->update) == NULL) { assert(gbIgnoreFind(select->release->ignore, acc) != NULL); return NULL; /* not in update, probably ignored */ } return entry; }
void copyRefSeqPepFa(struct gbUpdate* update, char* outDir, char *gbFile) /* copy a subset of the RefSeq peptide file for the select genes */ { struct gbRelease* release = update->release; char faInPath[PATH_LEN]; char faOutPath[PATH_LEN]; struct lineFile* inLf; boolean copying = FALSE; FILE* outFh; char* line; /* change the .gbff.Z suffix to .fsa.Z */ if (!endsWith(gbFile, ".gbff.Z")) errAbort("expected a file ending in .gbff.Z, got: %s", gbFile); strcpy(faInPath, gbFile); faInPath[strlen(faInPath)-7] = '\0'; strcat(faInPath, ".fsa.Z"); strcpy(faOutPath, outDir); strcat(faOutPath, "/"); strcat(faOutPath, faInPath); verbose(1, "copying from %s\n", faInPath); /* copy selected, don't bother with fa readers */ inLf = gzLineFileOpen(faInPath); outFh = gbMustOpenOutput(faOutPath); while (lineFileNext(inLf, &line, NULL)) { if (line[0] == '>') { char *geneAcc = parsePepGeneAcc(line); struct gbEntry* entry = NULL; if (geneAcc != NULL) entry = gbReleaseFindEntry(release, geneAcc); copying = ((entry != NULL) && (entry->selectVer > 0)); verbose(2, "acc for pep: %s: %s\n", geneAcc, (copying ? "yes" : "no")); } if (copying) { fputs(line, outFh); fputc('\n', outFh); if (ferror(outFh)) errnoAbort("write failed: %s: ", faOutPath); } } gbOutputRename(faOutPath, &outFh); gzLineFileClose(&inLf); }
boolean copyFastaRec(struct gbSelect* select, struct gbFa* inFa, struct outFa* nativeFa, struct outFa* xenoFa) /* Read and copy a record to one of the output files, if selected */ { char acc[GB_ACC_BUFSZ]; unsigned version; struct gbEntry* entry; if (!gbFaReadNext(inFa)) return FALSE; /* EOF */ version = gbSplitAccVer(inFa->id, acc); entry = gbReleaseFindEntry(select->release, acc); if (entry != NULL) { char* seq = gbFaGetSeq(inFa); if (strlen(seq) < MIN_SEQ_SIZE) { if (gbVerbose >= 3) gbVerbPr(3, "skip %s, less than minimum sequence size", inFa->id); } else if ((version == entry->selectVer) && (entry->clientFlags & ALIGN_FLAG)) { outFaWrite(((entry->orgCat == GB_NATIVE) ? nativeFa : xenoFa), inFa); if (gbVerbose >= 3) gbVerbPr(3, "aligning %s %s", inFa->id, gbOrgCatName(entry->orgCat)); } else if ((version == entry->selectVer) && (entry->clientFlags & MIGRATE_FLAG)) { if (gbVerbose >= 3) gbVerbPr(3, "migrating %s %s", inFa->id, gbOrgCatName(entry->orgCat)); } else { assert(version != entry->selectVer); if (gbVerbose >= 3) gbVerbPr(3, "skip %s, wrong version %s != %d", gbOrgCatName(entry->orgCat), inFa->id, entry->selectVer); } } else { if (gbVerbose >= 3) gbVerbPr(3, "skip %s, no entry", inFa->id); } return TRUE; }
void checkOrgCat(struct gbEntry* entry, struct gbSelect* prevSelect) /* Check for organism category changing for organisms we are managing. */ { struct slTime* reported = NULL; /* compare to latest processed entry */ checkOrgCats(entry, entry->processed->next, &reported); if (prevSelect != NULL) { /* check against all processed entries in the previous release */ struct gbEntry* prevEntry = gbReleaseFindEntry(prevSelect->release, entry->acc); if (prevEntry != NULL) checkOrgCats(entry, prevEntry->processed, &reported); } slFreeList(&reported); }
void getRequestedAccs(char* accFile, struct gbRelease* release, struct hash* accTbl) /* Mark for extraction the latest version of accs listed in a file */ { struct slName* accList = loadAccList(accFile); struct slName* acc; for (acc = accList; acc != NULL; acc = acc->next) { struct gbEntry* entry = gbReleaseFindEntry(release, acc->name); if (entry != NULL) selectAcc(entry->processed, accTbl, NULL); } slFreeList(&accList); }
struct gbAligned* findPrevAligned(struct gbSelect* prevSelect, struct gbProcessed* processed) /* Check to see if a accession is in the prevAligned. If the organism * category of the alignment doesn't match the entry, it will be ignored. * This can happen if organism aliases are added or the organism name changed. */ { struct gbAligned* prevAligned = NULL; /* default if no previous */ struct gbEntry* prevEntry = gbReleaseFindEntry(prevSelect->release, processed->entry->acc); if (prevEntry != NULL) prevAligned = gbEntryFindAlignedVer(prevEntry, processed->version); if ((prevAligned != NULL) && (prevAligned->alignOrgCat != processed->entry->orgCat)) return NULL; /* ignore due to category change */ else return prevAligned; }
static void processSeq(struct gbSelect* select, struct gbFa* inFa) /* process the next sequence from an update fasta file, possibly outputing * the sequence */ { char acc[GB_ACC_BUFSZ], hdrBuf[GB_ACC_BUFSZ], *hdr = NULL; short version = gbSplitAccVer(inFa->id, acc); /* will return NULL on ignored sequences */ struct gbEntry* entry = gbReleaseFindEntry(select->release, acc); if ((entry != NULL) && (version == entry->selectVer) && !entry->clientFlags) { /* selected, output if it appears valid */ if (isValidMrnaSeq(inFa)) { if (!gInclVersion) { /* put version in comment */ safef(hdrBuf, sizeof(hdrBuf), "%s %d", acc, version); hdr = hdrBuf; } gbFaWriteFromFa(gOutFa, inFa, hdr); entry->clientFlags = TRUE; /* flag so only gotten once */ } else { fprintf(stderr, "warning: %s does not appear to be a valid mRNA sequence, skipped: %s:%d\n", inFa->id, inFa->fileName, inFa->recLineNum); } } /* trace if enabled */ if (gbVerbose >= 3) { if (entry == NULL) gbVerbPr(3, "no entry: %s.%d", acc, version); else if (entry->selectVer <= 0) gbVerbPr(3, "not selected: %s.%d", acc, version); else if (version != entry->selectVer) gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer); else gbVerbPr(3, "save: %s.%d", acc, version); } }
void checkEst(struct gbRelease* mrnaRelease, struct gbEntry* entry, struct gbSelect* prevSelect) /* Check an EST, check for type change and orgCat change for * any of genomes in use */ { struct gbEntry* mrnaEntry = gbReleaseFindEntry(mrnaRelease, entry->acc); if (mrnaEntry != NULL) { /* type changed, output in format for ignore.idx */ if (mrnaEntry->processed->modDate > entry->processed->modDate) gbError("%s\t%s\t%s\t%s changes type EST to mRNA", mrnaEntry->acc, gbFormatDate(entry->processed->modDate), gbSrcDbName(mrnaRelease->srcDb), gbFormatDate(mrnaEntry->processed->modDate)); else gbError("%s\t%s\t%s\t%s changes type mRNA to EST", mrnaEntry->acc, gbFormatDate(mrnaEntry->processed->modDate), gbSrcDbName(mrnaRelease->srcDb), gbFormatDate(entry->processed->modDate)); } checkOrgCat(entry, prevSelect); }
static void selectStatus(struct gbStatusTbl* statusTbl, struct gbStatus* tmpStatus, void* clientData) /* Function called to determine if a status entry should be loaded. This * compares the status parsed from the gbStatus file with the gbIndex. * Unchanged entries are not loaded into the table, decresing memory required * for incremental loads. */ { struct selectStatusData* ssData = clientData; struct gbEntry* entry = gbReleaseFindEntry(ssData->select->release, tmpStatus->acc); struct gbProcessed* processed = NULL; struct gbAligned* aligned = NULL; struct hashEl* seqAccEl = hashLookup(ssData->seqHash, tmpStatus->acc); /* check if in seq table, record if found */ if (seqAccEl == NULL) { fprintf(stderr, "Error: %s is in gbStatus but not in gbSeq table\n", tmpStatus->acc); gErrorCnt++; } else seqAccEl->val = (void*)TRUE; if (entry != NULL) processed = getProcAligned(entry, &aligned); /* if no entry or not aligned, or if it shouldn't be included, delete */ if ((entry == NULL) || (aligned == NULL)) markDeleted(statusTbl, tmpStatus, ssData); else if (!loadNonCoding && (processed->molType != mol_mRNA)) markIgnore(statusTbl, tmpStatus, entry); else { /* validate entries are not going backwards */ if (aligned->version < tmpStatus->version) errAbort("version for %s in release (%d) is less than one in database (%d)", entry->acc, aligned->version, tmpStatus->version); if (processed->modDate < tmpStatus->modDate) { fprintf(stderr, "Warning: modDate for %s in release (%s) is before one in database (%s)\n", entry->acc, gbFormatDate(processed->modDate), gbFormatDate(tmpStatus->modDate)); } /* flag updates for changed for latter processing, order of checks is * very important.*/ if ((aligned->version > tmpStatus->version) || (aligned->numAligns != tmpStatus->numAligns)) markSeqChanged(statusTbl, tmpStatus, processed, aligned); else if (processed->modDate != tmpStatus->modDate) markMetaChanged(ssData->select, statusTbl, tmpStatus, processed, aligned); else if (statusTbl->extFileUpdate && !sameString(tmpStatus->extRelease, ssData->select->release->version)) markExtChanged(statusTbl, tmpStatus, processed, aligned); else if ((gOptions->flags & DBLOAD_REBUILD_DERIVED) && (entry->type == GB_MRNA)) markRebuildDerived(statusTbl, tmpStatus, processed, aligned); else markNoChange(statusTbl, tmpStatus, entry); } }