Exemple #1
0
void gbAlignDataDeleteOutdated(char *db, struct sqlConnection *conn,
                               struct gbSelect* select, 
                               struct gbStatusTbl* statusTbl,
                               struct dbLoadOptions* options,
                               char *tmpDir)
/* delete outdated alignment data */
{
struct sqlDeleter* deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
struct gbStatus* status;

/* delete seqChg, deleted, and orphans from alignments; clearing count of
 * number aligned. */
for (status = statusTbl->deleteList; status != NULL; status = status->next)
    {
    sqlDeleterAddAcc(deleter, status->acc);
    status->numAligns = 0;
    }
for (status = statusTbl->seqChgList; status != NULL; status = status->next)
    {
    sqlDeleterAddAcc(deleter, status->acc);
    status->numAligns = 0;
    }
for (status = statusTbl->orphanList; status != NULL; status = status->next)
    {
    sqlDeleterAddAcc(deleter, status->acc);
    status->numAligns = 0;
    }

gbAlignDataDeleteFromTables(db, conn, select->release->srcDb, select->type,
                            deleter, options);

sqlDeleterFree(&deleter);
} 
Exemple #2
0
struct sqlDeleter *gbBuildStateReloadDeleter(struct sqlConnection *conn,
                                             struct gbSelect* select,
                                             char *tmpDirPath)
/* get deleter for list of accessions to reload for the selected categories.
 * Used when reloading. Returns null if none found */
{
struct hash* seqTblAccs;
struct slName *gbStatAccs, *acc;
struct hashCookie cookie;
struct hashEl *hel;
struct sqlDeleter* deleter;
unsigned cnt = 0;
unsigned statTblSelect = select->release->srcDb | select->orgCats | select->type;

/* ESTs not implemented, which gets rid of complexities of accPrefix */
if (select->type & GB_EST)
    errAbort("gbBuildStateReloadDeleter doesn't handle ESTs");

seqTblAccs = seqTblLoadAcc(conn, select);
gbStatAccs = gbStatusTblLoadAcc(conn,  statTblSelect,  NULL);

/* build up deleter combining the two */
deleter = sqlDeleterNew(tmpDirPath, (gbVerbose >= 4));

cookie = hashFirst(seqTblAccs);
while ((hel = hashNext(&cookie)) != NULL)
    {
    sqlDeleterAddAcc(deleter, hel->name);
    cnt++;
    }

for (acc = gbStatAccs; acc != NULL; acc = acc->next)
    {
    if (hashLookup(seqTblAccs, acc->name) == NULL)
        {
        sqlDeleterAddAcc(deleter, acc->name);
        cnt++;
        }
    }

#ifdef DUMP_HASH_STATS
hashPrintStats(seqTblAccs, "seqTblAccs", stderr);
#endif
hashFree(&seqTblAccs);
slFreeList(&gbStatAccs);
if (cnt == 0)
    sqlDeleterFree(&deleter);

return deleter;
}
static void refSeqPepClean(struct sqlConnection *conn)
/* Delete all refseq peptides that are in gbSeq but no longer
 * referenced by refLink.  */
{
char query[1024];
struct sqlResult* sr;
char **row;
struct sqlDeleter* deleter;

/* don't do anything if we don't have the refLink table.  This can
 * happen if refSeq was enabled after the initial load */
if (!sqlTableExists(conn, "refLink"))
    return;

deleter = sqlDeleterNew(gTmpDir, (gbVerbose >= 4));

/* Use a join to get list of acc, which proved reasonable fastly because
 * the the list is small */
sqlSafef(query, sizeof(query), "SELECT acc FROM gbSeq LEFT JOIN refLink ON (refLink.protAcc = gbSeq.acc) "
      "WHERE (acc LIKE 'NP_%%') AND (refLink.protAcc IS NULL)");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    sqlDeleterAddAcc(deleter, row[0]);
sqlFreeResult(&sr);

sqlDeleterDel(deleter, conn, SEQ_TBL, "acc");
sqlDeleterFree(&deleter);
}
Exemple #4
0
static void refPepDropOne(struct sqlConnection *conn,
                          struct brokenRefPep *brp,
                          struct sqlDeleter* seqTblDeleter,
                          boolean dryRun)
/* drop a refPep */
{
gbVerbPr(2, "%s\t%s\tdrop", sqlGetDatabase(conn), brp->protAcc);
if (!dryRun)
    sqlDeleterAddAcc(seqTblDeleter, brp->protAcc);
}
static struct sqlDeleter*  buildIgnoredDeleters(struct sqlConnection *conn,
                                                struct gbRelease* release,
                                                boolean force, char* workDir)
/* Construct a deleter object with ignored acc that are in gbStatus.  return
 * NULL if none. */
{
struct sqlDeleter* deleter = NULL;
struct hashCookie cookie;
struct hashEl* hel;
char tmpDir[PATH_LEN];

/* Need to force load of ignore table, as release might not be initialized yet */
gbReleaseLoadIgnore(release);

safef(tmpDir, sizeof(tmpDir), "%s/ignore", workDir);

/* build delete object */
cookie = hashFirst(release->ignore->accHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct gbIgnoreAcc* igAcc;
    for (igAcc = hel->val; igAcc != NULL; igAcc = igAcc->next)
        {
        if (force || inGbStatusTable(conn, igAcc->acc, igAcc->modDate))
            {
            if (deleter == NULL)
                deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
            sqlDeleterAddAcc(deleter, igAcc->acc);
            gbVerbMsg(4, "%s %s ignored, will delete", igAcc->acc, 
                      gbFormatDate(igAcc->modDate));
            }

        }
    }
return deleter;
}
static struct sqlDeleter* buildReloadDeleter(char *reloadList, unsigned srcDb, char *tmpDir)
/* read reload list, building a deleter for the specified source DB */
{
struct sqlDeleter* deleter = NULL;
struct lineFile *lf = gzLineFileOpen(reloadList);
int cnt = 0;
char *row[1];

while (lineFileChopNext(lf, row, ArraySize(row)))
    {
    char *acc = trimSpaces(row[0]);
    if (gbGuessSrcDb(acc) == srcDb)
        {
        if (deleter == NULL)
            deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
        sqlDeleterAddAcc(deleter, acc);
        cnt++;
        gbVerbMsg(5, "%s delete for reloading", acc);
        }
    }
gzLineFileClose(&lf);
gbVerbMsg(1, "delete %d entries for reloading", cnt);
return deleter;
}
void gbMetaDataDeleteOutdated(struct sqlConnection *conn,
                              struct gbSelect* select,
                              struct gbStatusTbl* statusTbl,
                              struct dbLoadOptions* options,
                              char *tmpDir)
/* Delete outdated metadata.  Also delete genePred table entries for genes
 * where metadata changed but sequence has not.  These will have the genePred
 * records reloaded.*/
{
setGeneTblFlags(conn, options);
struct sqlDeleter* deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
struct sqlDeleter* geneTblDeleter = NULL;
struct sqlDeleter* derivedTblDeleter = NULL;
if (partitionMayHaveGeneTbls(select))
    geneTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
if (select->type == GB_MRNA)
    derivedTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
struct gbStatus* status;
gSrcDb = select->release->srcDb;
gOptions = options;
strcpy(gTmpDir, tmpDir);

/* Delete any meta modified from id tables */
for (status = statusTbl->seqChgList; status != NULL; status = status->next)
    {
    if (status->stateChg & GB_META_CHG)
        sqlDeleterAddAcc(deleter, status->acc);
    }
for (status = statusTbl->metaChgList; status != NULL; status = status->next)
    {
    assert(!(status->stateChg&GB_SEQ_CHG));
    if (status->stateChg&GB_META_CHG)
        sqlDeleterAddAcc(deleter, status->acc);
    else if (status->stateChg&GB_REBUILD_DERIVED)
        sqlDeleterAddAcc(derivedTblDeleter, status->acc);
    if (geneTblDeleter != NULL)
        {
        // need to just try, since we can set the status->isMgcFull
        // flag until we are reading the ra.
        sqlDeleterAddAcc(geneTblDeleter, status->acc);
        }
    }
for (status = statusTbl->deleteList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);
for (status = statusTbl->orphanList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);
gbMetaDataDeleteFromIdTables(conn, options, deleter);
sqlDeleterFree(&deleter);

/* remove deleted and orphans from metadata. */
deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
for (status = statusTbl->deleteList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);
for (status = statusTbl->orphanList; status != NULL; status = status->next)
    sqlDeleterAddAcc(deleter, status->acc);

// must do gene tbls before other tables
if (geneTblDeleter != NULL)
    deleteFromGeneTbls(conn, select, geneTblDeleter);
gbMetaDataDeleteFromTables(conn, options, select->release->srcDb, deleter);

sqlDeleterFree(&deleter);
sqlDeleterFree(&geneTblDeleter);
sqlDeleterFree(&derivedTblDeleter);

/* If we are cleaning up the ext table, we need to get rid of any
 * refseq peptides in gbSeq that are no longer referenced.  We don't
 * do it other times as these are not reachable directly.
 */
if ((select->release->srcDb == GB_REFSEQ)
    && (gOptions->flags & DBLOAD_EXT_FILE_UPDATE))
    refSeqPepClean(conn);
}