void gbAlignDataDeleteOutdated(char *db, struct sqlConnection *conn, struct gbSelect* select, struct gbStatusTbl* statusTbl, struct dbLoadOptions* options, char *tmpDir) /* delete outdated alignment data */ { struct sqlDeleter* deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); struct gbStatus* status; /* delete seqChg, deleted, and orphans from alignments; clearing count of * number aligned. */ for (status = statusTbl->deleteList; status != NULL; status = status->next) { sqlDeleterAddAcc(deleter, status->acc); status->numAligns = 0; } for (status = statusTbl->seqChgList; status != NULL; status = status->next) { sqlDeleterAddAcc(deleter, status->acc); status->numAligns = 0; } for (status = statusTbl->orphanList; status != NULL; status = status->next) { sqlDeleterAddAcc(deleter, status->acc); status->numAligns = 0; } gbAlignDataDeleteFromTables(db, conn, select->release->srcDb, select->type, deleter, options); sqlDeleterFree(&deleter); }
static void refSeqPepClean(struct sqlConnection *conn) /* Delete all refseq peptides that are in gbSeq but no longer * referenced by refLink. */ { char query[1024]; struct sqlResult* sr; char **row; struct sqlDeleter* deleter; /* don't do anything if we don't have the refLink table. This can * happen if refSeq was enabled after the initial load */ if (!sqlTableExists(conn, "refLink")) return; deleter = sqlDeleterNew(gTmpDir, (gbVerbose >= 4)); /* Use a join to get list of acc, which proved reasonable fastly because * the the list is small */ sqlSafef(query, sizeof(query), "SELECT acc FROM gbSeq LEFT JOIN refLink ON (refLink.protAcc = gbSeq.acc) " "WHERE (acc LIKE 'NP_%%') AND (refLink.protAcc IS NULL)"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) sqlDeleterAddAcc(deleter, row[0]); sqlFreeResult(&sr); sqlDeleterDel(deleter, conn, SEQ_TBL, "acc"); sqlDeleterFree(&deleter); }
struct sqlDeleter *gbBuildStateReloadDeleter(struct sqlConnection *conn, struct gbSelect* select, char *tmpDirPath) /* get deleter for list of accessions to reload for the selected categories. * Used when reloading. Returns null if none found */ { struct hash* seqTblAccs; struct slName *gbStatAccs, *acc; struct hashCookie cookie; struct hashEl *hel; struct sqlDeleter* deleter; unsigned cnt = 0; unsigned statTblSelect = select->release->srcDb | select->orgCats | select->type; /* ESTs not implemented, which gets rid of complexities of accPrefix */ if (select->type & GB_EST) errAbort("gbBuildStateReloadDeleter doesn't handle ESTs"); seqTblAccs = seqTblLoadAcc(conn, select); gbStatAccs = gbStatusTblLoadAcc(conn, statTblSelect, NULL); /* build up deleter combining the two */ deleter = sqlDeleterNew(tmpDirPath, (gbVerbose >= 4)); cookie = hashFirst(seqTblAccs); while ((hel = hashNext(&cookie)) != NULL) { sqlDeleterAddAcc(deleter, hel->name); cnt++; } for (acc = gbStatAccs; acc != NULL; acc = acc->next) { if (hashLookup(seqTblAccs, acc->name) == NULL) { sqlDeleterAddAcc(deleter, acc->name); cnt++; } } #ifdef DUMP_HASH_STATS hashPrintStats(seqTblAccs, "seqTblAccs", stderr); #endif hashFree(&seqTblAccs); slFreeList(&gbStatAccs); if (cnt == 0) sqlDeleterFree(&deleter); return deleter; }
static void makeRepairs(struct brokenRefPepTbl *brpTbl, struct sqlConnection *conn, struct extFileTbl* extFileTbl, boolean dryRun) /* make repairs once data is collected */ { static char *tmpDir = "/var/tmp"; struct hashCookie cookie; struct hashEl *hel; int repairCnt = 0; int dropCnt = 0; struct seqTbl* seqTbl = seqTblNew(conn, tmpDir, (gbVerbose > 3)); struct sqlDeleter* seqTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose > 3)); cookie = hashFirst(brpTbl->protAccHash); while ((hel = hashNext(&cookie)) != NULL) { struct brokenRefPep *brp = hel->val; if ((brp->mrnaAcc != NULL) && (brp->newFaOff >= 0)) { refPepRepairOne(conn, brp, seqTbl, extFileTbl, dryRun); repairCnt++; } else { refPepDropOne(conn, brp, seqTblDeleter, dryRun); dropCnt++; } } if (dryRun) { gbVerbMsg(1, "%s: would have repaired %d refseq protein gbExtFile entries", sqlGetDatabase(conn), repairCnt); gbVerbMsg(1, "%s: would have dropped %d refseq protein gbExtFile entries", sqlGetDatabase(conn), dropCnt); } else { seqTblCommit(seqTbl, conn); gbVerbMsg(1, "%s: repaired %d refseq protein gbExtFile entries", sqlGetDatabase(conn), repairCnt); sqlDeleterDel(seqTblDeleter, conn, SEQ_TBL, "acc"); gbVerbMsg(1, "%s: dropped %d refseq protein gbExtFile entries", sqlGetDatabase(conn), dropCnt); } }
static struct sqlDeleter* buildIgnoredDeleters(struct sqlConnection *conn, struct gbRelease* release, boolean force, char* workDir) /* Construct a deleter object with ignored acc that are in gbStatus. return * NULL if none. */ { struct sqlDeleter* deleter = NULL; struct hashCookie cookie; struct hashEl* hel; char tmpDir[PATH_LEN]; /* Need to force load of ignore table, as release might not be initialized yet */ gbReleaseLoadIgnore(release); safef(tmpDir, sizeof(tmpDir), "%s/ignore", workDir); /* build delete object */ cookie = hashFirst(release->ignore->accHash); while ((hel = hashNext(&cookie)) != NULL) { struct gbIgnoreAcc* igAcc; for (igAcc = hel->val; igAcc != NULL; igAcc = igAcc->next) { if (force || inGbStatusTable(conn, igAcc->acc, igAcc->modDate)) { if (deleter == NULL) deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); sqlDeleterAddAcc(deleter, igAcc->acc); gbVerbMsg(4, "%s %s ignored, will delete", igAcc->acc, gbFormatDate(igAcc->modDate)); } } } return deleter; }
static struct sqlDeleter* buildReloadDeleter(char *reloadList, unsigned srcDb, char *tmpDir) /* read reload list, building a deleter for the specified source DB */ { struct sqlDeleter* deleter = NULL; struct lineFile *lf = gzLineFileOpen(reloadList); int cnt = 0; char *row[1]; while (lineFileChopNext(lf, row, ArraySize(row))) { char *acc = trimSpaces(row[0]); if (gbGuessSrcDb(acc) == srcDb) { if (deleter == NULL) deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); sqlDeleterAddAcc(deleter, acc); cnt++; gbVerbMsg(5, "%s delete for reloading", acc); } } gzLineFileClose(&lf); gbVerbMsg(1, "delete %d entries for reloading", cnt); return deleter; }
void gbMetaDataDeleteOutdated(struct sqlConnection *conn, struct gbSelect* select, struct gbStatusTbl* statusTbl, struct dbLoadOptions* options, char *tmpDir) /* Delete outdated metadata. Also delete genePred table entries for genes * where metadata changed but sequence has not. These will have the genePred * records reloaded.*/ { setGeneTblFlags(conn, options); struct sqlDeleter* deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); struct sqlDeleter* geneTblDeleter = NULL; struct sqlDeleter* derivedTblDeleter = NULL; if (partitionMayHaveGeneTbls(select)) geneTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); if (select->type == GB_MRNA) derivedTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); struct gbStatus* status; gSrcDb = select->release->srcDb; gOptions = options; strcpy(gTmpDir, tmpDir); /* Delete any meta modified from id tables */ for (status = statusTbl->seqChgList; status != NULL; status = status->next) { if (status->stateChg & GB_META_CHG) sqlDeleterAddAcc(deleter, status->acc); } for (status = statusTbl->metaChgList; status != NULL; status = status->next) { assert(!(status->stateChg&GB_SEQ_CHG)); if (status->stateChg&GB_META_CHG) sqlDeleterAddAcc(deleter, status->acc); else if (status->stateChg&GB_REBUILD_DERIVED) sqlDeleterAddAcc(derivedTblDeleter, status->acc); if (geneTblDeleter != NULL) { // need to just try, since we can set the status->isMgcFull // flag until we are reading the ra. sqlDeleterAddAcc(geneTblDeleter, status->acc); } } for (status = statusTbl->deleteList; status != NULL; status = status->next) sqlDeleterAddAcc(deleter, status->acc); for (status = statusTbl->orphanList; status != NULL; status = status->next) sqlDeleterAddAcc(deleter, status->acc); gbMetaDataDeleteFromIdTables(conn, options, deleter); sqlDeleterFree(&deleter); /* remove deleted and orphans from metadata. */ deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4)); for (status = statusTbl->deleteList; status != NULL; status = status->next) sqlDeleterAddAcc(deleter, status->acc); for (status = statusTbl->orphanList; status != NULL; status = status->next) sqlDeleterAddAcc(deleter, status->acc); // must do gene tbls before other tables if (geneTblDeleter != NULL) deleteFromGeneTbls(conn, select, geneTblDeleter); gbMetaDataDeleteFromTables(conn, options, select->release->srcDb, deleter); sqlDeleterFree(&deleter); sqlDeleterFree(&geneTblDeleter); sqlDeleterFree(&derivedTblDeleter); /* If we are cleaning up the ext table, we need to get rid of any * refseq peptides in gbSeq that are no longer referenced. We don't * do it other times as these are not reachable directly. */ if ((select->release->srcDb == GB_REFSEQ) && (gOptions->flags & DBLOAD_EXT_FILE_UPDATE)) refSeqPepClean(conn); }