static bool checkForAccTypeChange(struct sqlConnection *conn, struct gbSelect* select, struct gbStatus* status) /* Check if a sequence that appears new has really had it's type has changed. * Returns true if type changed (or other error), false if nothing detected. */ { char query[128]; struct sqlResult* sr; char **row; bool changed = FALSE; sqlSafef(query, sizeof(query), "SELECT type FROM gbSeq WHERE acc = '%s'", status->acc); sr = sqlGetResult(conn, query); if ((sr != NULL) && ((row = sqlNextRow(sr)) != NULL)) { unsigned type = gbParseType(row[0]); if (type != status->type) fprintf(stderr, "Error: %s %s type has changed from %s to %s; add to ignore file\n", status->acc, gbFormatDate(status->modDate), gbFmtSelect(type), gbFmtSelect(status->type)); else fprintf(stderr, "Error: %s %s is in the seq table, but shouldn't be, don't know why\n", status->acc, gbFormatDate(status->modDate)); changed = TRUE; gErrorCnt++; } sqlFreeResult(&sr); return changed; }
static void traceSelect(char* which, struct gbStatus *status) /* output verbose information when a entry is selected */ { gbVerbPrStart(5, "%s: %s.%d %s id=%u", which, status->acc, status->version, gbFormatDate(status->modDate), status->gbSeqId); if (status->selectProc != NULL) gbVerbPrMore(5, ", proc=%s/%s", status->selectProc->update->name, gbFormatDate(status->selectProc->modDate)); if (status->selectAlign != NULL) gbVerbPrMore(5, ", aln=%s/%d", status->selectAlign->update->name, status->selectAlign->version); gbVerbPrMore(5, "\n"); }
static void chkGbStatusGbEntry(struct gbSelect* select, struct gbEntry* entry, struct metaData* md) /* check entry fields against status fields */ { /* processed entry should be the one matching the aligned update */ struct gbAligned* aligned = gbEntryFindAlignedVer(entry, md->gbsVersion); if (aligned == NULL) gbError("%s.%d: no genbank gbIndex aligned object for gbStatus", md->acc, md->gbsVersion); else { /* search for a processed entry matching this data and version */ struct gbProcessed* processed = entry->processed; while ((processed != NULL) && !((processed->modDate == md->gbsModDate) && (processed->version == md->gbsVersion))) processed = processed->next; if (processed == NULL) gbError("%s: no gbIndex processed entry for version %d, moddate %s, update %s", md->acc, md->gbsVersion, gbFormatDate(md->gbsModDate), aligned->update->name); if (aligned->numAligns != md->gbsNumAligns) gbError("%s.%d: genbank index number of alignments (%d) does not match gbStatus (%d)", md->acc, md->gbsVersion, aligned->numAligns, md->gbsNumAligns); } }
static void checkNewEntry(struct gbSelect* select, struct gbStatusTbl* statusTbl, struct gbEntry* entry) /* check if an entry is new */ { if (entry->selectVer == NULL_VERSION) { /* new entry, get the alignment. However if the processed directory * has not been aligned yet, it might not exist, in which case, it's * ignored.*/ struct gbAligned* aligned = NULL; struct gbProcessed* processed = getProcAligned(entry, &aligned); if (!loadNonCoding && (processed != NULL) && (processed->molType != mol_mRNA)) gbVerbPr(5, "nonCoding: %s.%d %s", entry->acc, entry->processed->version, gbMolTypeSym(processed->molType)); else if (aligned != NULL) { struct gbStatus* status = gbStatusTblAdd(statusTbl, entry->acc, aligned->version, processed->modDate, entry->type, select->release->srcDb, entry->orgCat, 0, 0, aligned->update->release->version, aligned->update->shortName, 0); markNew(statusTbl, status, processed, aligned); } else if (gbVerbose >= 5) { gbVerbPr(5, "notAligned: %s.%d %s", entry->acc, entry->processed->version, gbFormatDate(entry->processed->modDate)); } } }
static boolean inGbStatusTable(struct sqlConnection *conn, char* acc, time_t modDate) /* check if the specified accession is in the gbStatus table */ { char query[512]; safef(query, sizeof(query), "SELECT count(*) FROM gbStatus WHERE (acc='%s') AND (modDate='%s')", acc, gbFormatDate(modDate)); return (sqlQuickNum(conn, query) > 0); }
void checkProcOrgCat(struct gbEntry* entry, struct gbProcessed* proc0, char *org0, struct gbProcessed* proc, struct slTime** reported) /* Check for organism category changing from a give processed entry * to the latest entry. Report error if not already reported */ { char* org = gbGenomePreferedOrgName(proc->organism); /* name in static table, so can compare ptrs. NULL is returned * for organism we don't know about. change from NULL to not * NULL also a orgCat change. */ if ((org != org0) && !slTimeHave(*reported, proc->modDate)) { gbError("%s\t%s\t%s\t%s changes organism \"%s\" to \"%s\"", entry->acc, gbFormatDate(proc->modDate), gbSrcDbName(entry->processed->update->release->srcDb), gbFormatDate(proc0->modDate), proc->organism, proc0->organism); slSafeAddHead(reported, slTimeNew(proc->modDate)); } }
void checkEst(struct gbRelease* mrnaRelease, struct gbEntry* entry, struct gbSelect* prevSelect) /* Check an EST, check for type change and orgCat change for * any of genomes in use */ { struct gbEntry* mrnaEntry = gbReleaseFindEntry(mrnaRelease, entry->acc); if (mrnaEntry != NULL) { /* type changed, output in format for ignore.idx */ if (mrnaEntry->processed->modDate > entry->processed->modDate) gbError("%s\t%s\t%s\t%s changes type EST to mRNA", mrnaEntry->acc, gbFormatDate(entry->processed->modDate), gbSrcDbName(mrnaRelease->srcDb), gbFormatDate(mrnaEntry->processed->modDate)); else gbError("%s\t%s\t%s\t%s changes type mRNA to EST", mrnaEntry->acc, gbFormatDate(mrnaEntry->processed->modDate), gbSrcDbName(mrnaRelease->srcDb), gbFormatDate(entry->processed->modDate)); } checkOrgCat(entry, prevSelect); }
void gbEntryDump(struct gbEntry* entry, FILE* out, int indent) /* print a gbEntry object */ { struct gbProcessed* prNext; struct gbAligned* alNext; fprintf(out, "%*s%s: %s\n", indent, "", entry->acc, ((entry->type == GB_MRNA) ? "mRNA" : "EST")); for (prNext = entry->processed; prNext != NULL; prNext = prNext->next) fprintf(out, "%*spr: %s: %d %s \"%s\"\n", indent+2, "", prNext->update->name, prNext->version, gbFormatDate(prNext->modDate), prNext->organism); for (alNext = entry->aligned; alNext != NULL; alNext = alNext->next) fprintf(out, "%*sal: %s: %d\n", indent+2, "", alNext->update->name, alNext->version); }
static struct sqlDeleter* buildIgnoredDeleters(struct sqlConnection *conn, struct gbRelease* release, boolean force, char* workDir) /* Construct a deleter object with ignored acc that are in gbStatus. return * NULL if none. */ { struct sqlDeleter* deleter = NULL; struct hashCookie cookie; struct hashEl* hel; char tmpDir[PATH_LEN]; /* Need to force load of ignore table, as release might not be initialized yet */ gbReleaseLoadIgnore(release); safef(tmpDir, sizeof(tmpDir), "%s/ignore", workDir); /* build delete object */ cookie = hashFirst(release->ignore->accHash); while ((hel = hashNext(&cookie)) != NULL) { struct gbIgnoreAcc* igAcc; for (igAcc = hel->val; igAcc != NULL; igAcc = igAcc->next) { if (force || inGbStatusTable(conn, igAcc->acc, igAcc->modDate)) { if (deleter == NULL) deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); sqlDeleterAddAcc(deleter, igAcc->acc); gbVerbMsg(4, "%s %s ignored, will delete", igAcc->acc, gbFormatDate(igAcc->modDate)); } } } return deleter; }
static void gbCdnaInfoUpdate(struct gbStatus* status, struct sqlConnection *conn) /* Update the mrna table for the current entry */ { if (status->stateChg & GB_NEW) { if (haveMol) sqlUpdaterAddRow(gbCdnaInfoUpd, "%u\t%s\t%u\t%s\t%s\t%c\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%s", status->gbSeqId, raAcc, raVersion, gbFormatDate(raModDate), ((status->type == GB_MRNA) ? "mRNA" : "EST"), raDir, raFieldCurId("src"), raFieldCurId("org"), raFieldCurId("lib"), raFieldCurId("clo"), raFieldCurId("sex"), raFieldCurId("tis"), raFieldCurId("dev"), raFieldCurId("cel"), raFieldCurId("cds"), raFieldCurId("key"), raFieldCurId("def"), raFieldCurId("gen"), raFieldCurId("pro"), raFieldCurId("aut"), raGi, raMol); else if (haveGi) sqlUpdaterAddRow(gbCdnaInfoUpd, "%u\t%s\t%u\t%s\t%s\t%c\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u", status->gbSeqId, raAcc, raVersion, gbFormatDate(raModDate), ((status->type == GB_MRNA) ? "mRNA" : "EST"), raDir, raFieldCurId("src"), raFieldCurId("org"), raFieldCurId("lib"), raFieldCurId("clo"), raFieldCurId("sex"), raFieldCurId("tis"), raFieldCurId("dev"), raFieldCurId("cel"), raFieldCurId("cds"), raFieldCurId("key"), raFieldCurId("def"), raFieldCurId("gen"), raFieldCurId("pro"), raFieldCurId("aut"), raGi); else sqlUpdaterAddRow(gbCdnaInfoUpd, "%u\t%s\t%u\t%s\t%s\t%c\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u", status->gbSeqId, raAcc, raVersion, gbFormatDate(raModDate), ((status->type == GB_MRNA) ? "mRNA" : "EST"), raDir, raFieldCurId("src"), raFieldCurId("org"), raFieldCurId("lib"), raFieldCurId("clo"), raFieldCurId("sex"), raFieldCurId("tis"), raFieldCurId("dev"), raFieldCurId("cel"), raFieldCurId("cds"), raFieldCurId("key"), raFieldCurId("def"), raFieldCurId("gen"), raFieldCurId("pro"), raFieldCurId("aut")); } else if (status->stateChg & GB_META_CHG) { if (haveMol) sqlUpdaterModRow(gbCdnaInfoUpd, 1, "version='%u', moddate='%s', direction='%c', " "source=%u, organism=%u, library=%u, mrnaClone=%u, sex=%u, " "tissue=%u, development=%u, cell=%u, cds=%u, keyword=%u, " "description=%u, geneName=%u, productName=%u, author=%u, gi=%u, mol='%s' " "WHERE id=%u", raVersion, gbFormatDate(raModDate), raDir, raFieldCurId("src"), raFieldCurId("org"), raFieldCurId("lib"), raFieldCurId("clo"), raFieldCurId("sex"), raFieldCurId("tis"), raFieldCurId("dev"), raFieldCurId("cel"), raFieldCurId("cds"), raFieldCurId("key"), raFieldCurId("def"), raFieldCurId("gen"), raFieldCurId("pro"), raFieldCurId("aut"), raGi, raMol, status->gbSeqId); else if (haveGi) sqlUpdaterModRow(gbCdnaInfoUpd, 1, "version='%u', moddate='%s', direction='%c', " "source=%u, organism=%u, library=%u, mrnaClone=%u, sex=%u, " "tissue=%u, development=%u, cell=%u, cds=%u, keyword=%u, " "description=%u, geneName=%u, productName=%u, author=%u, gi=%u " "WHERE id=%u", raVersion, gbFormatDate(raModDate), raDir, raFieldCurId("src"), raFieldCurId("org"), raFieldCurId("lib"), raFieldCurId("clo"), raFieldCurId("sex"), raFieldCurId("tis"), raFieldCurId("dev"), raFieldCurId("cel"), raFieldCurId("cds"), raFieldCurId("key"), raFieldCurId("def"), raFieldCurId("gen"), raFieldCurId("pro"), raFieldCurId("aut"), raGi, status->gbSeqId); else sqlUpdaterModRow(gbCdnaInfoUpd, 1, "version='%u', moddate='%s', direction='%c', " "source=%u, organism=%u, library=%u, mrnaClone=%u, sex=%u, " "tissue=%u, development=%u, cell=%u, cds=%u, keyword=%u, " "description=%u, geneName=%u, productName=%u, author=%u " "WHERE id=%u", raVersion, gbFormatDate(raModDate), raDir, raFieldCurId("src"), raFieldCurId("org"), raFieldCurId("lib"), raFieldCurId("clo"), raFieldCurId("sex"), raFieldCurId("tis"), raFieldCurId("dev"), raFieldCurId("cel"), raFieldCurId("cds"), raFieldCurId("key"), raFieldCurId("def"), raFieldCurId("gen"), raFieldCurId("pro"), raFieldCurId("aut"), status->gbSeqId); } }
static void loadGbStatusRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row, unsigned descOrgCats) /* load a row of the gbStatus table */ { struct metaData* md; int iRow = 0; boolean isOk; HGID seqId; /* columns: acc,version,modDate,type,srcDb,gbSeq,numAligns */ md = metaDataTblsGet(metaDataTbls, row[iRow++]); if (md->inGbStatus) gbError("%s: occurs multiple times in the gbStatus table", md->acc); md->inGbStatus = TRUE; md->gbsVersion = strToUnsigned(row[iRow++], md->acc, "gbStatus.version", NULL); isOk = TRUE; md->gbsModDate = gbParseChkDate(row[iRow++], &isOk); if (!isOk) gbError("%s: invalid gbStatus.moddate value: \"%s\"", md->acc, row[iRow-1]); md->gbsType = gbParseType(row[iRow++]); md->gbsSrcDb = gbParseSrcDb(row[iRow++]); md->gbsOrgCat = gbParseOrgCat(row[iRow++]); seqId = strToUnsigned(row[iRow++], md->acc, "gbStatus.gbSeq", NULL); md->gbsNumAligns = strToUnsigned(row[iRow++], md->acc, "gbStatus.numAligns", NULL); md->typeFlags |= md->gbsType; if (md->inGbCdnaInfo) { if (seqId != md->gbCdnaInfoId) gbError("%s: gbStatus.gbSeq (%d) not same gbCdnaInfo.id (%d)", md->acc, seqId, md->gbCdnaInfoId); if (md->gbsType != md->gbCdnaInfoType) gbError("%s: gbStatus.type (%s) not same as gbCdnaInfo.type (%s)", md->acc, gbFmtSelect(md->gbsType), gbFmtSelect(md->gbCdnaInfoType)); if (md->gbsSrcDb != (md->typeFlags & GB_SRC_DB_MASK)) gbError("%s: gbStatus.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", md->acc, gbFmtSelect(md->gbsSrcDb), gbFmtSelect(md->typeFlags)); if (md->gbsVersion != md->gbCdnaInfoVersion) gbError("%s: gbStatus.version (%d) not same gbCdnaInfo.version (%d)", md->acc, md->gbsVersion, md->gbCdnaInfoVersion); if ((md->gbsModDate != md->gbCdnaInfoModdate)) gbError("%s: gbStatus.modDate (%s) not same gbCdnaInfo.moddate (%s)", md->acc, gbFormatDate(md->gbsModDate), gbFormatDate(md->gbCdnaInfoModdate)); /* verify either have or don't have a description */ if (descOrgCats & md->gbsOrgCat) { if (!md->haveDesc) gbError("%s: should have gbCdnaInfo.description: %s", md->acc, gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb)); } else { if (md->haveDesc) gbError("%s: should not have gbCdnaInfo.description: %s", md->acc, gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb)); } } }
static void selectStatus(struct gbStatusTbl* statusTbl, struct gbStatus* tmpStatus, void* clientData) /* Function called to determine if a status entry should be loaded. This * compares the status parsed from the gbStatus file with the gbIndex. * Unchanged entries are not loaded into the table, decresing memory required * for incremental loads. */ { struct selectStatusData* ssData = clientData; struct gbEntry* entry = gbReleaseFindEntry(ssData->select->release, tmpStatus->acc); struct gbProcessed* processed = NULL; struct gbAligned* aligned = NULL; struct hashEl* seqAccEl = hashLookup(ssData->seqHash, tmpStatus->acc); /* check if in seq table, record if found */ if (seqAccEl == NULL) { fprintf(stderr, "Error: %s is in gbStatus but not in gbSeq table\n", tmpStatus->acc); gErrorCnt++; } else seqAccEl->val = (void*)TRUE; if (entry != NULL) processed = getProcAligned(entry, &aligned); /* if no entry or not aligned, or if it shouldn't be included, delete */ if ((entry == NULL) || (aligned == NULL)) markDeleted(statusTbl, tmpStatus, ssData); else if (!loadNonCoding && (processed->molType != mol_mRNA)) markIgnore(statusTbl, tmpStatus, entry); else { /* validate entries are not going backwards */ if (aligned->version < tmpStatus->version) errAbort("version for %s in release (%d) is less than one in database (%d)", entry->acc, aligned->version, tmpStatus->version); if (processed->modDate < tmpStatus->modDate) { fprintf(stderr, "Warning: modDate for %s in release (%s) is before one in database (%s)\n", entry->acc, gbFormatDate(processed->modDate), gbFormatDate(tmpStatus->modDate)); } /* flag updates for changed for latter processing, order of checks is * very important.*/ if ((aligned->version > tmpStatus->version) || (aligned->numAligns != tmpStatus->numAligns)) markSeqChanged(statusTbl, tmpStatus, processed, aligned); else if (processed->modDate != tmpStatus->modDate) markMetaChanged(ssData->select, statusTbl, tmpStatus, processed, aligned); else if (statusTbl->extFileUpdate && !sameString(tmpStatus->extRelease, ssData->select->release->version)) markExtChanged(statusTbl, tmpStatus, processed, aligned); else if ((gOptions->flags & DBLOAD_REBUILD_DERIVED) && (entry->type == GB_MRNA)) markRebuildDerived(statusTbl, tmpStatus, processed, aligned); else markNoChange(statusTbl, tmpStatus, entry); } }