struct gbRelease* loadIndex(char* relName, unsigned types, char* database) /* load processed section of index for release */ { struct gbIndex* index = gbIndexNew(database, NULL); struct gbSelect select; ZeroVar(&select); select.release = gbIndexMustFindRelease(index, relName); if (types & GB_MRNA) { select.type = GB_MRNA; gbReleaseLoadProcessed(&select); } if ((types & GB_EST) && (select.release->srcDb == GB_GENBANK)) { struct slName* prefixes, *prefix; select.type = GB_EST; prefixes = gbReleaseGetAccPrefixes(select.release, GB_PROCESSED, GB_EST); for (prefix = prefixes; prefix != NULL; prefix = prefix->next) { select.accPrefix = prefix->name; gbReleaseLoadProcessed(&select); } select.accPrefix = NULL; slFreeList(&prefixes); } return select.release; }
int main(int argc, char* argv[]) { char *relName, *updateName, *typeAccPrefix, *database, *sep; struct gbIndex* index; struct gbSelect select; struct gbSelect* prevSelect = NULL; struct gbAlignInfo alignInfo; boolean noMigrate; ZeroVar(&select); optionInit(&argc, argv, optionSpecs); if (argc != 5) usage(); maxFaSize = optionInt("fasize", -1); workDir = optionVal("workdir", "work/align"); noMigrate = optionExists("noMigrate"); createPolyASizes = optionExists("polyASizes"); gbVerbInit(optionInt("verbose", 0)); relName = argv[1]; updateName = argv[2]; typeAccPrefix = argv[3]; database = argv[4]; /* parse typeAccPrefix */ sep = strchr(typeAccPrefix, '.'); if (sep != NULL) *sep = '\0'; select.type = gbParseType(typeAccPrefix); if (sep != NULL) { select.accPrefix = sep+1; *sep = '.'; } select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno")); index = gbIndexNew(database, NULL); select.release = gbIndexMustFindRelease(index, relName); select.update = gbReleaseMustFindUpdate(select.release, updateName); gbVerbMsg(0, "gbAlignGet: %s/%s/%s/%s", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix); /* Get the release to migrate, if applicable */ if (!noMigrate) prevSelect = gbAlignGetMigrateRel(&select); alignInfo = gbAlignGet(&select, prevSelect); /* always print stats */ fprintf(stderr, "gbAlignGet: %s/%s/%s/%s: align=%d, migrate=%d\n", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix, alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt); gbIndexFree(&index); /* print alignment and migrate count, which is read by the driver program */ printf("alignCnt: %d %d\n", alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt); return 0; }
int main(int argc, char* argv[]) { char *relName, *updateName, *typeAccPrefix, *database, *sep; struct gbIndex* index; struct gbSelect select; struct gbSelect* prevSelect = NULL; boolean noMigrate; ZeroVar(&select); optionInit(&argc, argv, optionSpecs); if (argc != 5) usage(); gWorkDir = optionVal("workdir", "work/align"); gSortTmp = optionVal("sortTmp", NULL); noMigrate = optionExists("noMigrate"); gbVerbInit(optionInt("verbose", 0)); relName = argv[1]; updateName = argv[2]; typeAccPrefix = argv[3]; database = argv[4]; /* parse typeAccPrefix */ sep = strchr(typeAccPrefix, '.'); if (sep != NULL) *sep = '\0'; select.type = gbParseType(typeAccPrefix); if (sep != NULL) { select.accPrefix = sep+1; *sep = '.'; } index = gbIndexNew(database, NULL); select.release = gbIndexMustFindRelease(index, relName); select.update = gbReleaseMustFindUpdate(select.release, updateName); select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno")); gbVerbMsg(0, "gbAlignInstall: %s/%s/%s/%s", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix); /* Get the release to migrate, if applicable */ if (!noMigrate) prevSelect = gbAlignGetMigrateRel(&select); gbAlignInstall(&select, prevSelect); /* must go to stderr to be logged */ gbVerbMsg(0, "gbAlignInstall: complete"); gbIndexFree(&index); return 0; }
void gbProcessedCheck() /* do processed sanity checks on newest genbank/refseq releases */ { struct gbIndex* index = gbIndexNew(NULL, gGbRoot); struct gbSelect* partitions, *select; /* get mRNA data only, ESTs are compared against mRNAs for type change */ partitions = gbIndexGetPartitions(index, GB_PROCESSED, GB_GENBANK|GB_REFSEQ, NULL, GB_MRNA, GB_NATIVE|GB_XENO, NULL); for (select = partitions; select != NULL; select = select->next) checkMrnaPartition(select); gbIndexFree(&index); }
int main(int argc, char* argv[]) { int argi; char* dumpFile = NULL; unsigned flags = 0; char* database, *accPrefix; struct gbIndex* index; struct stepInfo runInfo; gbVerbInit(0); optionInit(&argc, argv, optionSpecs); if (argc < 2) usage(); if (optionExists("processed")) flags |= DO_PROCESSED; if (optionExists("aligned")) flags |= DO_PROCESSED|DO_ALIGNED; if (optionExists("mrna")) flags |= DO_MRNA; if (optionExists("est")) flags |= DO_EST; dumpFile = optionVal("dump", NULL); database = optionVal("db", NULL); accPrefix = optionVal("accPrefix", NULL); if ((flags & DO_ALIGNED) && (database == NULL)) errAbort("must specify -db with -aligned"); if (!(flags & (DO_MRNA|DO_EST))) errAbort("must specify at least one of -mrna or -est"); if (!(flags & (DO_ALIGNED|DO_PROCESSED))) errAbort("must specify at least one of -processed or -aligned"); index = gbIndexNew(database, NULL); runInfo = beginStep(index, NULL, "loading index files"); for (argi = 1; argi < argc; argi++) testRelLoad(index, gbIndexMustFindRelease(index, argv[argi]), database, flags, accPrefix); if (dumpFile != NULL) { FILE* dumpOut = mustOpen(dumpFile, "w"); gbIndexDump(index, dumpOut); if (fclose(dumpOut) != 0) errnoAbort("close of dumpfile"); } endStep(index, &runInfo); gbIndexFree(&index); return 0; }
static void gbSanity(char* database) /* Run sanity checks */ { struct gbIndex* index = gbIndexNew(database, NULL); struct sqlConnection *conn; struct gbRelease* release; gbErrorSetDb(database); int checkedSetCnt = 0; if (gOptions.relRestrict == NULL) { int releaseCnt = 0; /* Check each partition of the genbank/refseq using the newest aligned * release */ release = newestReleaseWithAligns(index, database, GB_GENBANK); if (release != NULL) { releaseCnt++; checkedSetCnt += releaseSanity(release, database); } release = newestReleaseWithAligns(index, database, GB_REFSEQ); if (release != NULL) { releaseCnt++; checkedSetCnt += releaseSanity(release, database); } if (releaseCnt == 0) errAbort("Error: No RefSeq or Genbank alignments for %s\n" "have updates been enabled?", database); } else { release = gbIndexMustFindRelease(index, gOptions.relRestrict); checkedSetCnt += releaseSanity(release, database); } if (checkedSetCnt == 0) errAbort("Error: no alignment data was checked"); verbose(1, "%d alignment sets checked\n", checkedSetCnt); gbIndexFree(&index); /* check of uniqueStr ids */ conn = hAllocConn(database); checkGbCdnaInfoStrKeys(conn); hFreeConn(&conn); }
void checkEstPartitions(struct gbRelease* mrnaRelease) /* Check EST partitions, this compares against mRNA entries * for changed type. Separate gbIndex objects are used so EST * partitions can be unloaded. */ { struct gbIndex* estIndex = gbIndexNew(NULL, gGbRoot); struct gbRelease* estRelease = gbIndexMustFindRelease(estIndex, mrnaRelease->name); struct gbSelect* partitions, *select; partitions = gbIndexGetPartitions(estIndex, GB_PROCESSED, GB_GENBANK, estRelease->name, GB_EST, GB_NATIVE|GB_XENO, NULL); for (select = partitions; select != NULL; select = select->next) checkEstPartition(mrnaRelease, select); slFreeList(&partitions); gbIndexFree(&estIndex); }
void gbLoadRna(char* reloadList) /* Sync the database with the state in the genbank respository. */ { struct gbIndex* index = gbIndexNew(gDatabase, NULL); struct gbSelect* selectList, *select; struct sqlConnection* conn; /* must go through all tables if any reload is selected, * extFile update is requested, or rebuilding derived */ if ((reloadList != NULL) || gReload) gOptions.flags |= DBLOAD_BYPASS_GBLOADED; if (gReload && (gOptions.flags & DBLOAD_DRY_RUN)) errAbort("can't specify both -reload and -dryRun"); gbVerbEnter(1, "gbLoadRna"); conn = hAllocConn(gDatabase); gbLockDb(conn, NULL); if (gOptions.flags & DBLOAD_INITIAL) checkInitialLoad(conn); /* delete anything on the reload list up front */ if (((gOptions.flags & DBLOAD_DRY_RUN) == 0) && (reloadList != NULL)) { gbAlignDataInit(gWorkDir, &gOptions, conn); gbReloadDelete(gDatabase, reloadList, &gOptions, gWorkDir); } selectList = dbLoadPartitionsGet(&gOptions, index); if ((gOptions.flags & DBLOAD_INITIAL) && (selectList == NULL)) errAbort("-initialLoad specified and no sequences were found to load"); /* clean up any ignored entries before setting anything up */ gbVerbEnter(3, "delete ignored"); gbIgnoredDelete(gDatabase, selectList, gForceIgnoreDelete, &gOptions, gWorkDir); gbVerbLeave(3, "delete ignored"); /* loaded table to track updates that have been processed */ gLoadedTbl = gbLoadedTblNew(conn); /* load each partition */ for (select = selectList; select != NULL; select = select->next) loadPartition(select, conn); /* If we are delaying table load, now is the time */ if ((gOptions.flags & DBLOAD_INITIAL) && ((gOptions.flags & DBLOAD_DRY_RUN) == 0)) loadDelayedTables(); /* clean up extFile table if we change references for any seq */ if ((gOptions.flags & DBLOAD_EXT_FILE_UPDATE) && ((gOptions.flags & DBLOAD_DRY_RUN) == 0)) cleanExtFileTable(); /* clean up */ slFreeList(&selectList); gbMetaDataFree(); gbLoadedTblFree(&gLoadedTbl); gbUnlockDb(conn, NULL); hFreeConn(&conn); /* must go to stderr to be logged */ gbVerbLeave(1, "gbLoadRna"); if (gMaxShrinkageError) errAbort("Stoping due to maxShrinkage limit being exceeded in one or more\n" "partitions. Investigate and rerun with -allowLargeDeletes."); }