Ejemplo n.º 1
0
struct gbRelease* loadIndex(char* relName, unsigned types, char* database)
/* load processed section of index for release */
{
struct gbIndex* index = gbIndexNew(database, NULL);
struct gbSelect select;
ZeroVar(&select);
select.release = gbIndexMustFindRelease(index, relName);

if (types & GB_MRNA)
    {
    select.type = GB_MRNA;
    gbReleaseLoadProcessed(&select);
    }
if ((types & GB_EST) && (select.release->srcDb == GB_GENBANK))
    {
    struct slName* prefixes, *prefix;
    select.type = GB_EST;
    prefixes = gbReleaseGetAccPrefixes(select.release, GB_PROCESSED, GB_EST);
    for (prefix = prefixes; prefix != NULL; prefix = prefix->next)
        {
        select.accPrefix = prefix->name;
        gbReleaseLoadProcessed(&select);
        }
    select.accPrefix = NULL;
    slFreeList(&prefixes);
    }

return select.release;
}
int main(int argc, char* argv[])
{
char *relName, *updateName, *typeAccPrefix, *database, *sep;
struct gbIndex* index;
struct gbSelect select;
struct gbSelect* prevSelect = NULL;
struct gbAlignInfo alignInfo;
boolean noMigrate;
ZeroVar(&select);

optionInit(&argc, argv, optionSpecs);
if (argc != 5)
    usage();
maxFaSize = optionInt("fasize", -1);
workDir = optionVal("workdir", "work/align");
noMigrate = optionExists("noMigrate");
createPolyASizes = optionExists("polyASizes");
gbVerbInit(optionInt("verbose", 0));
relName = argv[1];
updateName = argv[2];
typeAccPrefix = argv[3];
database = argv[4];

/* parse typeAccPrefix */
sep = strchr(typeAccPrefix, '.');
if (sep != NULL)
    *sep = '\0';
select.type = gbParseType(typeAccPrefix);
if (sep != NULL)
    {
    select.accPrefix = sep+1;
    *sep = '.';
    }
select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno"));

index = gbIndexNew(database, NULL);
select.release = gbIndexMustFindRelease(index, relName);
select.update = gbReleaseMustFindUpdate(select.release, updateName);
gbVerbMsg(0, "gbAlignGet: %s/%s/%s/%s", select.release->name,
          select.release->genome->database, select.update->name,
          typeAccPrefix);

/* Get the release to migrate, if applicable */
if (!noMigrate)
    prevSelect = gbAlignGetMigrateRel(&select);

alignInfo = gbAlignGet(&select, prevSelect);

/* always print stats */
fprintf(stderr, "gbAlignGet: %s/%s/%s/%s: align=%d, migrate=%d\n",
        select.release->name, select.release->genome->database,
        select.update->name, typeAccPrefix,
        alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt);
gbIndexFree(&index);

/* print alignment and migrate count, which is read by the driver program */
printf("alignCnt: %d %d\n", alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt);
return 0;
}
Ejemplo n.º 3
0
int main(int argc, char* argv[])
{
char *relName, *updateName, *typeAccPrefix, *database, *sep;
struct gbIndex* index;
struct gbSelect select;
struct gbSelect* prevSelect = NULL;
boolean noMigrate;
ZeroVar(&select);

optionInit(&argc, argv, optionSpecs);
if (argc != 5)
    usage();
gWorkDir = optionVal("workdir", "work/align");
gSortTmp = optionVal("sortTmp", NULL);
noMigrate = optionExists("noMigrate");
gbVerbInit(optionInt("verbose", 0));
relName = argv[1];
updateName = argv[2];
typeAccPrefix = argv[3];
database = argv[4];

/* parse typeAccPrefix */
sep = strchr(typeAccPrefix, '.');
if (sep != NULL)
    *sep = '\0';
select.type = gbParseType(typeAccPrefix);
if (sep != NULL)
    {
    select.accPrefix = sep+1;
    *sep = '.';
    }

index = gbIndexNew(database, NULL);
select.release = gbIndexMustFindRelease(index, relName);
select.update = gbReleaseMustFindUpdate(select.release, updateName);
select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno"));

gbVerbMsg(0, "gbAlignInstall: %s/%s/%s/%s", select.release->name,
          select.release->genome->database, select.update->name,
          typeAccPrefix);

/* Get the release to migrate, if applicable */
if (!noMigrate)
    prevSelect = gbAlignGetMigrateRel(&select);

gbAlignInstall(&select, prevSelect);

/* must go to stderr to be logged */
gbVerbMsg(0, "gbAlignInstall: complete");
    
gbIndexFree(&index);
return 0;
}
void gbProcessedCheck()
/* do processed sanity checks on newest genbank/refseq releases */
{
struct gbIndex* index = gbIndexNew(NULL, gGbRoot);
struct gbSelect* partitions, *select;

/* get mRNA data only, ESTs are compared against mRNAs for type change */
partitions = gbIndexGetPartitions(index, GB_PROCESSED, GB_GENBANK|GB_REFSEQ, 
                                  NULL, GB_MRNA, GB_NATIVE|GB_XENO, NULL);
for (select = partitions; select != NULL; select = select->next)
    checkMrnaPartition(select);
gbIndexFree(&index);
}
Ejemplo n.º 5
0
int main(int argc, char* argv[])
{
int argi;
char* dumpFile = NULL;
unsigned flags = 0;
char* database, *accPrefix;
struct gbIndex* index;
struct stepInfo runInfo;

gbVerbInit(0);
optionInit(&argc, argv, optionSpecs);
if (argc < 2)
    usage();
if (optionExists("processed"))
    flags |= DO_PROCESSED;
if (optionExists("aligned"))
    flags |= DO_PROCESSED|DO_ALIGNED;
if (optionExists("mrna"))
    flags |= DO_MRNA;
if (optionExists("est"))
    flags |= DO_EST;

dumpFile = optionVal("dump", NULL);
database = optionVal("db", NULL);
accPrefix = optionVal("accPrefix", NULL);

if ((flags & DO_ALIGNED) && (database == NULL))
    errAbort("must specify -db with -aligned");
if (!(flags & (DO_MRNA|DO_EST)))
    errAbort("must specify at least one of -mrna or -est");
if (!(flags & (DO_ALIGNED|DO_PROCESSED)))
    errAbort("must specify at least one of -processed or -aligned");
            
index = gbIndexNew(database, NULL);
runInfo = beginStep(index, NULL, "loading index files");

for (argi = 1; argi < argc; argi++)
    testRelLoad(index, gbIndexMustFindRelease(index, argv[argi]),
                database, flags, accPrefix);

if (dumpFile != NULL)
    {
    FILE* dumpOut = mustOpen(dumpFile, "w");
    gbIndexDump(index, dumpOut);
    if (fclose(dumpOut) != 0)
        errnoAbort("close of dumpfile");
    }
endStep(index, &runInfo);
gbIndexFree(&index);
return 0;
}
Ejemplo n.º 6
0
static void gbSanity(char* database)
/* Run sanity checks */
{
struct gbIndex* index = gbIndexNew(database, NULL);
struct sqlConnection *conn;
struct gbRelease* release;
gbErrorSetDb(database);
int checkedSetCnt = 0;
if (gOptions.relRestrict == NULL)
    {
    int releaseCnt = 0;
    /* Check each partition of the genbank/refseq using the newest aligned
     * release */
    release = newestReleaseWithAligns(index, database, GB_GENBANK);
    if (release != NULL)
        {
        releaseCnt++;
        checkedSetCnt += releaseSanity(release, database);
        }

    release = newestReleaseWithAligns(index, database, GB_REFSEQ);
    if (release != NULL)
        {
        releaseCnt++;
        checkedSetCnt += releaseSanity(release, database);
        }
    if (releaseCnt == 0)
        errAbort("Error: No RefSeq or Genbank alignments for %s\n"
                 "have updates been enabled?", database);
    }
else
    {
    release = gbIndexMustFindRelease(index, gOptions.relRestrict);
    checkedSetCnt += releaseSanity(release, database);
    }
if (checkedSetCnt == 0)
    errAbort("Error: no alignment data was checked");
verbose(1, "%d alignment sets checked\n", checkedSetCnt);
    
gbIndexFree(&index);

/* check of uniqueStr ids */
conn = hAllocConn(database);
checkGbCdnaInfoStrKeys(conn);
hFreeConn(&conn);
}
void checkEstPartitions(struct gbRelease* mrnaRelease)
/* Check EST partitions, this compares against mRNA entries
 * for changed type.  Separate gbIndex objects are used so EST
 * partitions can be unloaded.
 */
{
struct gbIndex* estIndex =  gbIndexNew(NULL, gGbRoot);
struct gbRelease* estRelease = gbIndexMustFindRelease(estIndex,
                                                      mrnaRelease->name);
struct gbSelect* partitions, *select;
partitions = gbIndexGetPartitions(estIndex, GB_PROCESSED, GB_GENBANK, 
                                  estRelease->name,
                                  GB_EST, GB_NATIVE|GB_XENO, NULL);
for (select = partitions; select != NULL; select = select->next)
    checkEstPartition(mrnaRelease, select);

slFreeList(&partitions);
gbIndexFree(&estIndex);
}
void gbLoadRna(char* reloadList)
/* Sync the database with the state in the genbank respository. */
{
struct gbIndex* index = gbIndexNew(gDatabase, NULL);
struct gbSelect* selectList, *select;
struct sqlConnection* conn;

/* must go through all tables if any reload is selected,
 * extFile update is requested, or rebuilding derived */
if ((reloadList != NULL) || gReload)
    gOptions.flags |= DBLOAD_BYPASS_GBLOADED;

if (gReload && (gOptions.flags & DBLOAD_DRY_RUN))
    errAbort("can't specify both -reload and -dryRun");

gbVerbEnter(1, "gbLoadRna");
conn = hAllocConn(gDatabase);
gbLockDb(conn, NULL);

if (gOptions.flags & DBLOAD_INITIAL)
    checkInitialLoad(conn);

/* delete anything on the reload list up front */
if (((gOptions.flags & DBLOAD_DRY_RUN) == 0) && (reloadList != NULL))
    {
    gbAlignDataInit(gWorkDir, &gOptions, conn);
    gbReloadDelete(gDatabase, reloadList, &gOptions, gWorkDir);
    }

selectList = dbLoadPartitionsGet(&gOptions, index);
if ((gOptions.flags & DBLOAD_INITIAL) && (selectList == NULL))
    errAbort("-initialLoad specified and no sequences were found to load");

/* clean up any ignored entries before setting anything up */
gbVerbEnter(3, "delete ignored");
gbIgnoredDelete(gDatabase, selectList, gForceIgnoreDelete, &gOptions, gWorkDir);
gbVerbLeave(3, "delete ignored");

/* loaded table to track updates that have been processed */
gLoadedTbl = gbLoadedTblNew(conn);

/* load each partition */
for (select = selectList; select != NULL; select = select->next)
    loadPartition(select, conn);

/* If we are delaying table load, now is the time */
if ((gOptions.flags & DBLOAD_INITIAL)
    && ((gOptions.flags & DBLOAD_DRY_RUN) == 0))
    loadDelayedTables();

/* clean up extFile table if we change references for any seq */
if ((gOptions.flags & DBLOAD_EXT_FILE_UPDATE) && ((gOptions.flags & DBLOAD_DRY_RUN) == 0))
    cleanExtFileTable();

/* clean up */
slFreeList(&selectList);
gbMetaDataFree();
gbLoadedTblFree(&gLoadedTbl);
gbUnlockDb(conn, NULL);
hFreeConn(&conn);

/* must go to stderr to be logged */
gbVerbLeave(1, "gbLoadRna");
if (gMaxShrinkageError)
    errAbort("Stoping due to maxShrinkage limit being exceeded in one or more\n"
             "partitions. Investigate and rerun with -allowLargeDeletes.");
}