Пример #1
0
void gbAlignDataDeleteOutdated(char *db, struct sqlConnection *conn,
                               struct gbSelect* select, 
                               struct gbStatusTbl* statusTbl,
                               struct dbLoadOptions* options,
                               char *tmpDir)
/* delete outdated alignment data */
{
struct sqlDeleter* deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
struct gbStatus* status;

/* delete seqChg, deleted, and orphans from alignments; clearing count of
 * number aligned. */
for (status = statusTbl->deleteList; status != NULL; status = status->next)
    {
    sqlDeleterAddAcc(deleter, status->acc);
    status->numAligns = 0;
    }
for (status = statusTbl->seqChgList; status != NULL; status = status->next)
    {
    sqlDeleterAddAcc(deleter, status->acc);
    status->numAligns = 0;
    }
for (status = statusTbl->orphanList; status != NULL; status = status->next)
    {
    sqlDeleterAddAcc(deleter, status->acc);
    status->numAligns = 0;
    }

gbAlignDataDeleteFromTables(db, conn, select->release->srcDb, select->type,
                            deleter, options);

sqlDeleterFree(&deleter);
} 
Пример #2
0
static void refSeqPepClean(struct sqlConnection *conn)
/* Delete all refseq peptides that are in gbSeq but no longer
 * referenced by refLink.  */
{
char query[1024];
struct sqlResult* sr;
char **row;
struct sqlDeleter* deleter;

/* don't do anything if we don't have the refLink table.  This can
 * happen if refSeq was enabled after the initial load */
if (!sqlTableExists(conn, "refLink"))
    return;

deleter = sqlDeleterNew(gTmpDir, (gbVerbose >= 4));

/* Use a join to get list of acc, which proved reasonable fastly because
 * the the list is small */
sqlSafef(query, sizeof(query), "SELECT acc FROM gbSeq LEFT JOIN refLink ON (refLink.protAcc = gbSeq.acc) "
      "WHERE (acc LIKE 'NP_%%') AND (refLink.protAcc IS NULL)");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    sqlDeleterAddAcc(deleter, row[0]);
sqlFreeResult(&sr);

sqlDeleterDel(deleter, conn, SEQ_TBL, "acc");
sqlDeleterFree(&deleter);
}
Пример #3
0
struct sqlDeleter *gbBuildStateReloadDeleter(struct sqlConnection *conn,
                                             struct gbSelect* select,
                                             char *tmpDirPath)
/* get deleter for list of accessions to reload for the selected categories.
 * Used when reloading. Returns null if none found */
{
struct hash* seqTblAccs;
struct slName *gbStatAccs, *acc;
struct hashCookie cookie;
struct hashEl *hel;
struct sqlDeleter* deleter;
unsigned cnt = 0;
unsigned statTblSelect = select->release->srcDb | select->orgCats | select->type;

/* ESTs not implemented, which gets rid of complexities of accPrefix */
if (select->type & GB_EST)
    errAbort("gbBuildStateReloadDeleter doesn't handle ESTs");

seqTblAccs = seqTblLoadAcc(conn, select);
gbStatAccs = gbStatusTblLoadAcc(conn,  statTblSelect,  NULL);

/* build up deleter combining the two */
deleter = sqlDeleterNew(tmpDirPath, (gbVerbose >= 4));

cookie = hashFirst(seqTblAccs);
while ((hel = hashNext(&cookie)) != NULL)
    {
    sqlDeleterAddAcc(deleter, hel->name);
    cnt++;
    }

for (acc = gbStatAccs; acc != NULL; acc = acc->next)
    {
    if (hashLookup(seqTblAccs, acc->name) == NULL)
        {
        sqlDeleterAddAcc(deleter, acc->name);
        cnt++;
        }
    }

#ifdef DUMP_HASH_STATS
hashPrintStats(seqTblAccs, "seqTblAccs", stderr);
#endif
hashFree(&seqTblAccs);
slFreeList(&gbStatAccs);
if (cnt == 0)
    sqlDeleterFree(&deleter);

return deleter;
}
Пример #4
0
static void makeRepairs(struct brokenRefPepTbl *brpTbl,
                        struct sqlConnection *conn,
                        struct extFileTbl* extFileTbl,
                        boolean dryRun)
/* make repairs once data is collected */
{
static char *tmpDir = "/var/tmp";
struct hashCookie cookie;
struct hashEl *hel;
int repairCnt = 0;
int dropCnt = 0;
struct seqTbl* seqTbl = seqTblNew(conn, tmpDir, (gbVerbose > 3));
struct sqlDeleter* seqTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose > 3));

cookie = hashFirst(brpTbl->protAccHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct brokenRefPep *brp = hel->val;
    if ((brp->mrnaAcc != NULL) && (brp->newFaOff >= 0))
        {
        refPepRepairOne(conn, brp, seqTbl, extFileTbl, dryRun);
        repairCnt++;
        }
    else
        {
        refPepDropOne(conn, brp, seqTblDeleter, dryRun);
        dropCnt++;
        }
    }
if (dryRun)
    {
    gbVerbMsg(1, "%s: would have repaired %d refseq protein gbExtFile entries", 
              sqlGetDatabase(conn), repairCnt);
    gbVerbMsg(1, "%s: would have dropped %d refseq protein gbExtFile entries", 
              sqlGetDatabase(conn), dropCnt);
    }
else
    {
    seqTblCommit(seqTbl, conn);
    gbVerbMsg(1, "%s: repaired %d refseq protein gbExtFile entries",
              sqlGetDatabase(conn), repairCnt);
    sqlDeleterDel(seqTblDeleter, conn, SEQ_TBL, "acc");
    gbVerbMsg(1, "%s: dropped %d refseq protein gbExtFile entries",
              sqlGetDatabase(conn), dropCnt);
    }
}
static struct sqlDeleter*  buildIgnoredDeleters(struct sqlConnection *conn,
                                                struct gbRelease* release,
                                                boolean force, char* workDir)
/* Construct a deleter object with ignored acc that are in gbStatus.  return
 * NULL if none. */
{
struct sqlDeleter* deleter = NULL;
struct hashCookie cookie;
struct hashEl* hel;
char tmpDir[PATH_LEN];

/* Need to force load of ignore table, as release might not be initialized yet */
gbReleaseLoadIgnore(release);

safef(tmpDir, sizeof(tmpDir), "%s/ignore", workDir);

/* build delete object */
cookie = hashFirst(release->ignore->accHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct gbIgnoreAcc* igAcc;
    for (igAcc = hel->val; igAcc != NULL; igAcc = igAcc->next)
        {
        if (force || inGbStatusTable(conn, igAcc->acc, igAcc->modDate))
            {
            if (deleter == NULL)
                deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
            sqlDeleterAddAcc(deleter, igAcc->acc);
            gbVerbMsg(4, "%s %s ignored, will delete", igAcc->acc, 
                      gbFormatDate(igAcc->modDate));
            }

        }
    }
return deleter;
}
static struct sqlDeleter* buildReloadDeleter(char *reloadList, unsigned srcDb, char *tmpDir)
/* read reload list, building a deleter for the specified source DB */
{
struct sqlDeleter* deleter = NULL;
struct lineFile *lf = gzLineFileOpen(reloadList);
int cnt = 0;
char *row[1];

while (lineFileChopNext(lf, row, ArraySize(row)))
    {
    char *acc = trimSpaces(row[0]);
    if (gbGuessSrcDb(acc) == srcDb)
        {
        if (deleter == NULL)
            deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
        sqlDeleterAddAcc(deleter, acc);
        cnt++;
        gbVerbMsg(5, "%s delete for reloading", acc);
        }
    }
gzLineFileClose(&lf);
gbVerbMsg(1, "delete %d entries for reloading", cnt);
return deleter;
}
Пример #7
0
void gbMetaDataDeleteOutdated(struct sqlConnection *conn,
                              struct gbSelect* select,
                              struct gbStatusTbl* statusTbl,
                              struct dbLoadOptions* options,
                              char *tmpDir)
/* Delete outdated metadata.  Also delete genePred table entries for genes
 * where metadata changed but sequence has not.  These will have the genePred
 * records reloaded.*/
{
setGeneTblFlags(conn, options);
struct sqlDeleter* deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
struct sqlDeleter* geneTblDeleter = NULL;
struct sqlDeleter* derivedTblDeleter = NULL;
if (partitionMayHaveGeneTbls(select))
    geneTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
if (select->type == GB_MRNA)
    derivedTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
struct gbStatus* status;
gSrcDb = select->release->srcDb;
gOptions = options;
strcpy(gTmpDir, tmpDir);

/* Delete any meta modified from id tables */
for (status = statusTbl->seqChgList; status != NULL; status = status->next)
    {
    if (status->stateChg & GB_META_CHG)
        sqlDeleterAddAcc(deleter, status->acc);
    }
for (status = statusTbl->metaChgList; status != NULL; status = status->next)
    {
    assert(!(status->stateChg&GB_SEQ_CHG));
    if (status->stateChg&GB_META_CHG)
        sqlDeleterAddAcc(deleter, status->acc);
    else if (status->stateChg&GB_REBUILD_DERIVED)
        sqlDeleterAddAcc(derivedTblDeleter, status->acc);
    if (geneTblDeleter != NULL)
        {
        // need to just try, since we can set the status->isMgcFull
        // flag until we are reading the ra.
        sqlDeleterAddAcc(geneTblDeleter, status->acc);
        }
    }
for (status = statusTbl->deleteList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);
for (status = statusTbl->orphanList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);
gbMetaDataDeleteFromIdTables(conn, options, deleter);
sqlDeleterFree(&deleter);

/* remove deleted and orphans from metadata. */
deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
for (status = statusTbl->deleteList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);
for (status = statusTbl->orphanList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);

// must do gene tbls before other tables
if (geneTblDeleter != NULL)
    deleteFromGeneTbls(conn, select, geneTblDeleter);
gbMetaDataDeleteFromTables(conn, options, select->release->srcDb, deleter);

sqlDeleterFree(&deleter);
sqlDeleterFree(&geneTblDeleter);
sqlDeleterFree(&derivedTblDeleter);

/* If we are cleaning up the ext table, we need to get rid of any
 * refseq peptides in gbSeq that are no longer referenced.  We don't
 * do it other times as these are not reachable directly.
 */
if ((select->release->srcDb == GB_REFSEQ)
    && (gOptions->flags & DBLOAD_EXT_FILE_UPDATE))
    refSeqPepClean(conn);
}