Exemple #1
0
static bool checkForAccTypeChange(struct sqlConnection *conn, 
                                  struct gbSelect* select,
                                  struct gbStatus* status)
/* Check if a sequence that appears new has really had it's type has changed.
 * Returns true if type changed (or other error), false if nothing detected.
 */
{
char query[128];
struct sqlResult* sr;
char **row;
bool changed = FALSE;

sqlSafef(query, sizeof(query),
      "SELECT type FROM gbSeq WHERE acc = '%s'", status->acc);
sr = sqlGetResult(conn, query);
if ((sr != NULL) && ((row = sqlNextRow(sr)) != NULL))
    {
    unsigned type = gbParseType(row[0]);
    if (type != status->type)
        fprintf(stderr,
                "Error: %s %s type has changed from %s to %s; add to ignore file\n",
                status->acc, gbFormatDate(status->modDate),
                gbFmtSelect(type), gbFmtSelect(status->type));
    else
        fprintf(stderr,
                "Error: %s %s is in the seq table, but shouldn't be, don't know why\n",
                status->acc, gbFormatDate(status->modDate));
    changed = TRUE;
    gErrorCnt++;
    }
sqlFreeResult(&sr);
return changed;
}
Exemple #2
0
static void traceSelect(char* which, struct gbStatus *status)
/* output verbose information when a entry is selected */
{

gbVerbPrStart(5, "%s: %s.%d %s id=%u", which, status->acc, status->version,
              gbFormatDate(status->modDate), status->gbSeqId);
if (status->selectProc != NULL)
    gbVerbPrMore(5, ", proc=%s/%s", status->selectProc->update->name,
                 gbFormatDate(status->selectProc->modDate));
if (status->selectAlign != NULL)
   gbVerbPrMore(5, ", aln=%s/%d", status->selectAlign->update->name,
                status->selectAlign->version);
gbVerbPrMore(5, "\n");
}
static void chkGbStatusGbEntry(struct gbSelect* select, struct gbEntry* entry,
                               struct metaData* md)
/* check entry fields against status fields */
{
/* processed entry should be the one matching the aligned update */
struct gbAligned* aligned = gbEntryFindAlignedVer(entry,
                                                  md->gbsVersion);
if (aligned == NULL)
    gbError("%s.%d: no genbank gbIndex aligned object for gbStatus",
            md->acc, md->gbsVersion);
else
    {
    /* search for a processed entry matching this data and version */
    struct gbProcessed* processed = entry->processed;
    while ((processed != NULL) &&
           !((processed->modDate == md->gbsModDate)
             && (processed->version == md->gbsVersion)))
        processed = processed->next;
    if (processed == NULL)
        gbError("%s: no gbIndex processed entry for version %d, moddate %s, update %s",
                md->acc, md->gbsVersion, gbFormatDate(md->gbsModDate),
                aligned->update->name);
    if (aligned->numAligns != md->gbsNumAligns)
        gbError("%s.%d: genbank index number of alignments (%d) does not match gbStatus (%d)",
                md->acc, md->gbsVersion, aligned->numAligns, md->gbsNumAligns);
    }
}
Exemple #4
0
static void checkNewEntry(struct gbSelect* select, struct gbStatusTbl* statusTbl,
                          struct gbEntry* entry)
/* check if an entry is new */
{
if (entry->selectVer == NULL_VERSION)
    {
    /* new entry, get the alignment.  However if the processed directory
     * has not been aligned yet, it might not exist, in which case, it's
     * ignored.*/
    struct gbAligned* aligned = NULL;
    struct gbProcessed* processed = getProcAligned(entry, &aligned);
    if (!loadNonCoding && (processed != NULL) && (processed->molType != mol_mRNA))
        gbVerbPr(5, "nonCoding: %s.%d %s", entry->acc, entry->processed->version, gbMolTypeSym(processed->molType));
    else if (aligned != NULL)
        {
        struct gbStatus* status
            = gbStatusTblAdd(statusTbl, entry->acc,
                             aligned->version, processed->modDate,
                             entry->type, select->release->srcDb,
                             entry->orgCat, 0, 0,
                             aligned->update->release->version,
                             aligned->update->shortName, 0);
        markNew(statusTbl, status, processed, aligned);
        }
    else if (gbVerbose >= 5)
        {
        gbVerbPr(5, "notAligned: %s.%d %s", entry->acc, entry->processed->version,
                 gbFormatDate(entry->processed->modDate));
        }
    }
}
static boolean inGbStatusTable(struct sqlConnection *conn, char* acc,
                               time_t modDate)
/* check if the specified accession is in the gbStatus table */
{
char query[512];
safef(query, sizeof(query),
      "SELECT count(*) FROM gbStatus WHERE (acc='%s') AND (modDate='%s')",
      acc, gbFormatDate(modDate));
return (sqlQuickNum(conn, query) > 0);
}
void checkProcOrgCat(struct gbEntry* entry, struct gbProcessed* proc0, char *org0,
                     struct gbProcessed* proc, struct slTime** reported)
/* Check for organism category changing from a give processed entry
 * to the latest entry. Report error if not already reported */
{
char* org = gbGenomePreferedOrgName(proc->organism);
/* name in static table,  so can compare ptrs. NULL is returned
 * for organism we don't know about. change from NULL to not
 * NULL also a orgCat change. */
if ((org != org0) && !slTimeHave(*reported, proc->modDate))
    {
    gbError("%s\t%s\t%s\t%s changes organism \"%s\" to \"%s\"",
            entry->acc, 
            gbFormatDate(proc->modDate),
            gbSrcDbName(entry->processed->update->release->srcDb),
            gbFormatDate(proc0->modDate),
            proc->organism,
            proc0->organism);
    slSafeAddHead(reported, slTimeNew(proc->modDate));
    }
}
void checkEst(struct gbRelease* mrnaRelease,
              struct gbEntry* entry,
              struct gbSelect* prevSelect)
/* Check an EST, check for type change and orgCat change for
 * any of genomes in use */
{
struct gbEntry* mrnaEntry = gbReleaseFindEntry(mrnaRelease, entry->acc);
if (mrnaEntry != NULL)
    {
    /* type changed, output in format for ignore.idx */
    if (mrnaEntry->processed->modDate > entry->processed->modDate)
        gbError("%s\t%s\t%s\t%s changes type EST to mRNA",
                mrnaEntry->acc, gbFormatDate(entry->processed->modDate),
                gbSrcDbName(mrnaRelease->srcDb),
                gbFormatDate(mrnaEntry->processed->modDate));
    else
        gbError("%s\t%s\t%s\t%s changes type mRNA to EST",
                mrnaEntry->acc, gbFormatDate(mrnaEntry->processed->modDate),
                gbSrcDbName(mrnaRelease->srcDb),
                gbFormatDate(entry->processed->modDate));
    }
checkOrgCat(entry, prevSelect);
}
void gbEntryDump(struct gbEntry* entry, FILE* out, int indent)
/* print a gbEntry object */
{
struct gbProcessed* prNext;
struct gbAligned* alNext;

fprintf(out, "%*s%s: %s\n", indent, "", entry->acc,
        ((entry->type == GB_MRNA) ? "mRNA" : "EST"));
for (prNext = entry->processed; prNext != NULL; prNext = prNext->next)
    fprintf(out, "%*spr: %s: %d %s \"%s\"\n", indent+2, "",
            prNext->update->name, prNext->version,
            gbFormatDate(prNext->modDate), prNext->organism);

for (alNext = entry->aligned; alNext != NULL; alNext = alNext->next)
    fprintf(out, "%*sal: %s: %d\n", indent+2, "",
            alNext->update->name, alNext->version);
}
static struct sqlDeleter*  buildIgnoredDeleters(struct sqlConnection *conn,
                                                struct gbRelease* release,
                                                boolean force, char* workDir)
/* Construct a deleter object with ignored acc that are in gbStatus.  return
 * NULL if none. */
{
struct sqlDeleter* deleter = NULL;
struct hashCookie cookie;
struct hashEl* hel;
char tmpDir[PATH_LEN];

/* Need to force load of ignore table, as release might not be initialized yet */
gbReleaseLoadIgnore(release);

safef(tmpDir, sizeof(tmpDir), "%s/ignore", workDir);

/* build delete object */
cookie = hashFirst(release->ignore->accHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct gbIgnoreAcc* igAcc;
    for (igAcc = hel->val; igAcc != NULL; igAcc = igAcc->next)
        {
        if (force || inGbStatusTable(conn, igAcc->acc, igAcc->modDate))
            {
            if (deleter == NULL)
                deleter = sqlDeleterNew(tmpDir, (gbVerbose >= 4));
            sqlDeleterAddAcc(deleter, igAcc->acc);
            gbVerbMsg(4, "%s %s ignored, will delete", igAcc->acc, 
                      gbFormatDate(igAcc->modDate));
            }

        }
    }
return deleter;
}
static void gbCdnaInfoUpdate(struct gbStatus* status, struct sqlConnection *conn)
/* Update the mrna table for the current entry */
{
if (status->stateChg & GB_NEW)
    {
    if (haveMol)
        sqlUpdaterAddRow(gbCdnaInfoUpd, "%u\t%s\t%u\t%s\t%s\t%c\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%s",
                         status->gbSeqId, raAcc, raVersion, gbFormatDate(raModDate),
                         ((status->type == GB_MRNA) ? "mRNA" : "EST"), raDir,
                         raFieldCurId("src"), raFieldCurId("org"),
                         raFieldCurId("lib"), raFieldCurId("clo"),
                         raFieldCurId("sex"), raFieldCurId("tis"),
                         raFieldCurId("dev"), raFieldCurId("cel"),
                         raFieldCurId("cds"), raFieldCurId("key"),
                         raFieldCurId("def"), raFieldCurId("gen"),
                         raFieldCurId("pro"), raFieldCurId("aut"),
                         raGi, raMol);
    else if (haveGi)
        sqlUpdaterAddRow(gbCdnaInfoUpd, "%u\t%s\t%u\t%s\t%s\t%c\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u",
                         status->gbSeqId, raAcc, raVersion, gbFormatDate(raModDate),
                         ((status->type == GB_MRNA) ? "mRNA" : "EST"), raDir,
                         raFieldCurId("src"), raFieldCurId("org"),
                         raFieldCurId("lib"), raFieldCurId("clo"),
                         raFieldCurId("sex"), raFieldCurId("tis"),
                         raFieldCurId("dev"), raFieldCurId("cel"),
                         raFieldCurId("cds"), raFieldCurId("key"),
                         raFieldCurId("def"), raFieldCurId("gen"),
                         raFieldCurId("pro"), raFieldCurId("aut"),
                         raGi);
    else
        sqlUpdaterAddRow(gbCdnaInfoUpd, "%u\t%s\t%u\t%s\t%s\t%c\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u\t%u",
                         status->gbSeqId, raAcc, raVersion, gbFormatDate(raModDate),
                         ((status->type == GB_MRNA) ? "mRNA" : "EST"), raDir,
                         raFieldCurId("src"), raFieldCurId("org"),
                         raFieldCurId("lib"), raFieldCurId("clo"),
                         raFieldCurId("sex"), raFieldCurId("tis"),
                         raFieldCurId("dev"), raFieldCurId("cel"),
                         raFieldCurId("cds"), raFieldCurId("key"),
                         raFieldCurId("def"), raFieldCurId("gen"),
                         raFieldCurId("pro"), raFieldCurId("aut"));
    }
else if (status->stateChg & GB_META_CHG)
    {
    if (haveMol)
        sqlUpdaterModRow(gbCdnaInfoUpd, 1, "version='%u', moddate='%s', direction='%c', "
                         "source=%u, organism=%u, library=%u, mrnaClone=%u, sex=%u, "
                         "tissue=%u, development=%u, cell=%u, cds=%u, keyword=%u, "
                         "description=%u, geneName=%u, productName=%u, author=%u, gi=%u, mol='%s' "
                         "WHERE id=%u",
                         raVersion, gbFormatDate(raModDate), raDir,
                         raFieldCurId("src"), raFieldCurId("org"),
                         raFieldCurId("lib"), raFieldCurId("clo"),
                         raFieldCurId("sex"), raFieldCurId("tis"),
                         raFieldCurId("dev"), raFieldCurId("cel"),
                         raFieldCurId("cds"), raFieldCurId("key"),
                         raFieldCurId("def"), raFieldCurId("gen"),
                         raFieldCurId("pro"), raFieldCurId("aut"),
                         raGi, raMol, status->gbSeqId);
    else if (haveGi)
        sqlUpdaterModRow(gbCdnaInfoUpd, 1, "version='%u', moddate='%s', direction='%c', "
                         "source=%u, organism=%u, library=%u, mrnaClone=%u, sex=%u, "
                         "tissue=%u, development=%u, cell=%u, cds=%u, keyword=%u, "
                         "description=%u, geneName=%u, productName=%u, author=%u, gi=%u "
                         "WHERE id=%u",
                         raVersion, gbFormatDate(raModDate), raDir,
                         raFieldCurId("src"), raFieldCurId("org"),
                         raFieldCurId("lib"), raFieldCurId("clo"),
                         raFieldCurId("sex"), raFieldCurId("tis"),
                         raFieldCurId("dev"), raFieldCurId("cel"),
                         raFieldCurId("cds"), raFieldCurId("key"),
                         raFieldCurId("def"), raFieldCurId("gen"),
                         raFieldCurId("pro"), raFieldCurId("aut"),
                         raGi, status->gbSeqId);
    else
        sqlUpdaterModRow(gbCdnaInfoUpd, 1, "version='%u', moddate='%s', direction='%c', "
                         "source=%u, organism=%u, library=%u, mrnaClone=%u, sex=%u, "
                         "tissue=%u, development=%u, cell=%u, cds=%u, keyword=%u, "
                         "description=%u, geneName=%u, productName=%u, author=%u "
                         "WHERE id=%u",
                         raVersion, gbFormatDate(raModDate), raDir,
                         raFieldCurId("src"), raFieldCurId("org"),
                         raFieldCurId("lib"), raFieldCurId("clo"),
                         raFieldCurId("sex"), raFieldCurId("tis"),
                         raFieldCurId("dev"), raFieldCurId("cel"),
                         raFieldCurId("cds"), raFieldCurId("key"),
                         raFieldCurId("def"), raFieldCurId("gen"),
                         raFieldCurId("pro"), raFieldCurId("aut"),
                         status->gbSeqId);
    }
}
static void loadGbStatusRow(struct metaDataTbls* metaDataTbls,
                            struct sqlConnection* conn, char** row,
                            unsigned descOrgCats)
/* load a row of the gbStatus table */
{
struct metaData* md;
int iRow = 0;
boolean isOk;
HGID seqId;

/* columns: acc,version,modDate,type,srcDb,gbSeq,numAligns */

md = metaDataTblsGet(metaDataTbls, row[iRow++]);
if (md->inGbStatus)
    gbError("%s: occurs multiple times in the gbStatus table", md->acc);
md->inGbStatus = TRUE;
md->gbsVersion = strToUnsigned(row[iRow++], md->acc, "gbStatus.version", NULL);

isOk = TRUE;
md->gbsModDate = gbParseChkDate(row[iRow++], &isOk);
if (!isOk)
    gbError("%s: invalid gbStatus.moddate value: \"%s\"", md->acc, row[iRow-1]);

md->gbsType = gbParseType(row[iRow++]);
md->gbsSrcDb = gbParseSrcDb(row[iRow++]);
md->gbsOrgCat = gbParseOrgCat(row[iRow++]);
seqId = strToUnsigned(row[iRow++], md->acc, "gbStatus.gbSeq", NULL);
md->gbsNumAligns = strToUnsigned(row[iRow++], md->acc, "gbStatus.numAligns",
                                 NULL);

md->typeFlags |= md->gbsType;

if (md->inGbCdnaInfo)
    {
    if (seqId != md->gbCdnaInfoId)
        gbError("%s: gbStatus.gbSeq (%d) not same gbCdnaInfo.id (%d)", md->acc, seqId,
                md->gbCdnaInfoId);
    if (md->gbsType != md->gbCdnaInfoType)
        gbError("%s: gbStatus.type (%s) not same as gbCdnaInfo.type (%s)", md->acc,
                gbFmtSelect(md->gbsType), gbFmtSelect(md->gbCdnaInfoType));
    if (md->gbsSrcDb != (md->typeFlags & GB_SRC_DB_MASK))
        gbError("%s: gbStatus.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", md->acc,
                gbFmtSelect(md->gbsSrcDb), gbFmtSelect(md->typeFlags));
    if (md->gbsVersion != md->gbCdnaInfoVersion)
        gbError("%s: gbStatus.version (%d) not same gbCdnaInfo.version (%d)", md->acc,
                md->gbsVersion, md->gbCdnaInfoVersion);
    if ((md->gbsModDate != md->gbCdnaInfoModdate))
        gbError("%s: gbStatus.modDate (%s) not same gbCdnaInfo.moddate (%s)", md->acc,
                gbFormatDate(md->gbsModDate), gbFormatDate(md->gbCdnaInfoModdate));
    /* verify either have or don't have a description */
    if (descOrgCats & md->gbsOrgCat)
        {
        if (!md->haveDesc)
            gbError("%s: should have gbCdnaInfo.description: %s", md->acc,
                    gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb));
        }
    else
        {
        if (md->haveDesc)
            gbError("%s: should not have gbCdnaInfo.description: %s", md->acc,
                    gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb));
        }
    }
}
Exemple #12
0
static void selectStatus(struct gbStatusTbl* statusTbl,
                         struct gbStatus* tmpStatus,
                         void* clientData)
/* Function called to determine if a status entry should be loaded.  This
 * compares the status parsed from the gbStatus file with the gbIndex.
 * Unchanged entries are not loaded into the table, decresing memory required
 * for incremental loads.
 */
{
struct selectStatusData* ssData = clientData;
struct gbEntry* entry = gbReleaseFindEntry(ssData->select->release,
                                           tmpStatus->acc);
struct gbProcessed* processed = NULL;
struct gbAligned* aligned = NULL;
struct hashEl* seqAccEl = hashLookup(ssData->seqHash, tmpStatus->acc);

/* check if in seq table, record if found */
if (seqAccEl == NULL)
    {
    fprintf(stderr, "Error: %s is in gbStatus but not in gbSeq table\n",
            tmpStatus->acc);
    gErrorCnt++;
    }
else
    seqAccEl->val = (void*)TRUE;

if (entry != NULL)
    processed = getProcAligned(entry, &aligned);
/* if no entry or not aligned, or if it shouldn't be included, delete */
if ((entry == NULL) || (aligned == NULL))
    markDeleted(statusTbl, tmpStatus, ssData);
else if (!loadNonCoding && (processed->molType != mol_mRNA))
    markIgnore(statusTbl, tmpStatus, entry);
else
    {
    /* validate entries are not going backwards */
    if (aligned->version < tmpStatus->version)
        errAbort("version for %s in release (%d) is less than one in database (%d)",
                 entry->acc, aligned->version, tmpStatus->version);
    if (processed->modDate < tmpStatus->modDate)
        {
        fprintf(stderr, "Warning: modDate for %s in release (%s) is before one in database (%s)\n",
                entry->acc, gbFormatDate(processed->modDate),
                gbFormatDate(tmpStatus->modDate));
        }
    /* flag updates for changed for latter processing, order of checks is
     * very important.*/
    if ((aligned->version > tmpStatus->version)
        || (aligned->numAligns != tmpStatus->numAligns))
        markSeqChanged(statusTbl, tmpStatus, processed, aligned);
    else if (processed->modDate != tmpStatus->modDate)
        markMetaChanged(ssData->select, statusTbl, tmpStatus, processed,
                        aligned);
    else if (statusTbl->extFileUpdate
             && !sameString(tmpStatus->extRelease,
                            ssData->select->release->version))
        markExtChanged(statusTbl, tmpStatus, processed, aligned);
    else if ((gOptions->flags & DBLOAD_REBUILD_DERIVED)
             && (entry->type == GB_MRNA))
        markRebuildDerived(statusTbl, tmpStatus, processed, aligned);
    else 
        markNoChange(statusTbl, tmpStatus, entry);
    }
}