struct gbAlignInfo gbAlignGet(struct gbSelect* select, struct gbSelect* prevSelect) /* Build files to align in the work directory. If this is not a full release, * or there is no previously aligned release, prevSelect should be NULL. */ { struct gbAlignInfo alignInfo; gbVerbEnter(1, "gbAlignGet: %s", gbSelectDesc(select)); if (prevSelect != NULL) prevSelect->orgCats = select->orgCats; /* load the required entry data */ gbReleaseLoadProcessed(select); if (prevSelect != NULL) { gbReleaseLoadProcessed(prevSelect); gbReleaseLoadAligned(prevSelect); } /* select entries to align */ gbVerbEnter(2, "selecting seqs to align"); alignInfo = gbAlignFindNeedAligned(select, prevSelect); gbVerbLeave(2, "selecting seqs to align"); if (alignInfo.migrate.accTotalCnt > 0) gbVerbMsg(1, "gbAlignGet: %d %s entries, %d alignments will be migrated", alignInfo.migrate.accTotalCnt, gbFmtSelect(select->type), alignInfo.migrate.recTotalCnt); /* create fasta with sequences to align if not empty */ if (alignInfo.align.accTotalCnt > 0) { gbVerbMsg(1, "gbAlignGet: %d %s sequences will be align", alignInfo.align.accTotalCnt, gbFmtSelect(select->type)); copySelectedFasta(select); } /* leave calling cards */ if (select->orgCats & GB_NATIVE) markAligns(select, GB_NATIVE); if (select->orgCats & GB_XENO) markAligns(select, GB_XENO); /* print before releasing memory */ gbVerbLeave(1, "gbAlignGet: %s", gbSelectDesc(select)); /* unload entries to free memory */ gbReleaseUnload(select->release); if (prevSelect != NULL) gbReleaseUnload(prevSelect->release); return alignInfo; }
void processMetaData(struct sqlConnection *conn, struct gbSelect* select, struct gbStatusTbl* statusTbl, char* tmpDir) /* Parse and load the metadata */ { struct gbUpdate* update; gbVerbEnter(3, "processing metadata"); gbMetaDataInit(conn, select->release->srcDb, &gOptions, gGbdbGenBank, tmpDir); for (update = select->release->updates; update != NULL; update = update->next) { if (update->selectProc) { select->update = update; parseUpdateMetaData(conn, select, statusTbl); } } select->update = NULL; gbVerbLeave(3, "processing metadata"); if (!(gOptions.flags & DBLOAD_INITIAL)) { gbMetaDataUpdateChgGenes(conn, select, statusTbl, tmpDir); loadMetaData(conn); } }
void loadAligns(struct sqlConnection *conn) /* load pending alignments */ { gbVerbEnter(3, "load alignments"); gbAlignDataDbLoad(conn); gbVerbLeave(3, "load alignments"); }
void loadMetaData(struct sqlConnection *conn) /* load the metadata into the database */ { gbVerbEnter(3, "loading metadata"); gbMetaDataDbLoad(conn); gbVerbLeave(3, "loading metadata"); }
void checkMrnaPartition(struct gbSelect* select) /* Check an mRNA partition. For genbank, check all ESTs against * this mRNA partation. */ { struct hashCookie cookie; struct hashEl* hel; gbReleaseLoadProcessed(select); struct gbSelect* prevSelect = gbProcessedGetPrevRel(select); if (prevSelect != NULL) gbReleaseLoadProcessed(prevSelect); gbVerbEnter(2, "checking %s", gbSelectDesc(select)); cookie = hashFirst(select->release->entryTbl); while ((hel = hashNext(&cookie)) != NULL) checkOrgCat(hel->val, prevSelect); gbVerbLeave(2, "checking %s", gbSelectDesc(select)); if (select->release->srcDb == GB_GENBANK) checkEstPartitions(select->release); gbReleaseUnload(select->release); if (prevSelect != NULL) { gbReleaseUnload(prevSelect->release); freeMem(prevSelect); } }
void deleteOutdated(struct sqlConnection *conn, struct gbSelect* select, struct gbStatusTbl* statusTbl, char* tmpDir) /* delete outdated alignments and metadata from the database. */ { gbVerbEnter(3, "delete outdated"); /* first the alignments */ gbVerbMsg(4, "delete outdated alignments"); gbAlignDataDeleteOutdated(gDatabase, conn, select, statusTbl, &gOptions, tmpDir); /* now drop metadata entries */ gbVerbMsg(4, "delete outdated metadata"); gbMetaDataDeleteOutdated(conn, select, statusTbl, &gOptions, tmpDir); /* Now it's safe to drop deleted entries from the database status table. */ gbVerbMsg(4, "delete outdated gbStatus"); gbStatusTblRemoveDeleted(statusTbl, conn); /* orphaned now become new */ statusTbl->newList = slCat(statusTbl->newList, statusTbl->orphanList); statusTbl->orphanList = NULL; statusTbl->numNew += statusTbl->numOrphan; statusTbl->numOrphan = 0; gbVerbLeave(3, "delete outdated"); }
void loadMgcStatus(struct sqlConnection *conn, char *mgcStatusTab, char *statusTblName) /* load the mgcStatus or mgcFullStatus tables, return name loaded */ { struct lineFile* inLf; FILE *outFh; char tmpFile[PATH_LEN]; gbVerbEnter(2, "loading %s", statusTblName); /* uncompress to tmp file */ safef(tmpFile, sizeof(tmpFile), "%s/mgcStatus.%s.%d.tmp", workDir, getHost(), getpid()); inLf = gzLineFileOpen(mgcStatusTab); outFh = gzMustOpen(tmpFile, "w"); while (mgcStatusTblCopyRow(inLf, outFh)) continue; gzClose(&outFh); gzLineFileClose(&inLf); mgcStatusTblCreate(conn, statusTblName); sqlLoadTabFile(conn, tmpFile, statusTblName, SQL_TAB_FILE_ON_SERVER); unlink(tmpFile); gbVerbLeave(2, "loading %s", statusTblName); }
struct metaDataTbls* chkMetaDataTbls(struct gbSelect* select, struct sqlConnection* conn, boolean checkExtSeqRecs, unsigned descOrgCats, char* gbdbMapToCurrent) /* load the metadata tables do basic validatation. descOrgCats are * orgCats that should have descriptions. */ { struct metaDataTbls* metaDataTbls; gbVerbEnter(1, "load and check metadata tables: %s", gbSelectDesc(select)); metaDataTbls = metaDataTblsNew(); /* order is important here to allow checking between tables */ loadGbCdnaInfoData(metaDataTbls, select, conn); if (select->release->srcDb == GB_REFSEQ) { /* must load before seq data due to protein checks */ loadRefSeqStatus(metaDataTbls, conn); loadRefLink(metaDataTbls, conn); } loadSeqData(metaDataTbls, select, conn, checkExtSeqRecs, gbdbMapToCurrent); loadGbStatus(metaDataTbls, select, descOrgCats, conn); gbVerbLeave(1, "load and check metadata tables: %s", gbSelectDesc(select)); return metaDataTbls; }
void migratePsls(struct migrateAligns* migrate, unsigned pslFileType, struct gbEntryCnts* counts, FILE* outPslFh) /* Migrate selected PSL records */ { char inPsl[PATH_LEN]; struct lineFile* inPslLf; struct psl* psl; gbAlignedGetPath(migrate->prevSelect, gPslFileGzExt[pslFileType], NULL, inPsl); /* It's possible to end up here and not have a file if none of the sequences * aligned */ if (fileExists(inPsl)) { gbVerbEnter(2, "migrating %ss from %s", gPslFileExt[pslFileType], inPsl); inPslLf = gzLineFileOpen(inPsl); while ((psl = pslNext(inPslLf)) != NULL) { migratePsl(migrate, pslFileType, counts, psl, inPsl, outPslFh); pslFree(&psl); } gzLineFileClose(&inPslLf); gbVerbLeave(2, "migrating %ss from %s", gPslFileExt[pslFileType], inPsl); } }
void createMgcFailedEst(struct sqlConnection *conn) /* create the mgcFailedEst table */ { char sql[1024]; char tmpTbl[32]; tblBldGetTmpName(tmpTbl, sizeof(tmpTbl), MGC_FAILED_EST_TBL); gbVerbEnter(2, "loading %s", tmpTbl); tblBldRemakePslTable(conn, tmpTbl, "all_est"); /* insert a join by image id of the all_est table and mgcStatus rows having * failed status values, only getting 5' ESTs. No acc in mgcStatus indicates * a imageClone id that has not be sequenced. */ safef(sql, sizeof(sql), "INSERT INTO %s" " SELECT all_est.* FROM all_est, mgcStatus_tmp, imageClone" " WHERE (all_est.qName = imageClone.acc)" " AND (imageClone.direction = '5')" " AND (mgcStatus_tmp.imageId = imageClone.imageId)" " AND (mgcStatus_tmp.acc = '')" " AND (mgcStatus_tmp.state = %d)", tmpTbl, MGC_STATE_PROBLEM); sqlUpdate(conn, sql); gbVerbLeave(2, "loading %s", tmpTbl); }
int main(int argc, char *argv[]) /* Process command line. */ { char *database; optionInit(&argc, argv, optionSpecs); if (argc != 2) usage(); if (optionExists("gbdbCurrent")) { char* p; gGbdbMapToCurrent = optionVal("gbdbCurrent", NULL); /* trim trailing slashes */ for (p = gGbdbMapToCurrent + (strlen(gGbdbMapToCurrent)-1); ((p > gGbdbMapToCurrent) && (*p == '/')); p--) *p = '\0'; } gbVerbInit(optionInt("verbose", 0)); if (gbVerbose >= 5) sqlMonitorEnable(JKSQL_TRACE); database = argv[1]; gOptions = dbLoadOptionsParse(database); testMode = optionExists("test"); gCheckExtSeqRecs = optionExists("checkExtSeqRecs"); gbVerbEnter(0, "gbSanity: begin: %s", database); gbSanity(database); gbVerbLeave(0, "gbSanity: completed: %d errors", errorCnt); return ((errorCnt == 0) ? 0 : 1); }
void migrateAligned(struct gbSelect* select, struct gbSelect* prevSelect, struct gbAlignInfo* alignInfo, struct outputFiles* out, struct recCounts* recCounts) /* Migrate existing aligned PSLs from an earlier release. */ { int orgCatIdx = gbOrgCatIdx(select->orgCats); struct gbUpdate* prevUpdateHold = prevSelect->update; struct gbUpdate* prevUpdate; struct migrateAligns migrate; ZeroVar(&migrate); migrate.select = select; migrate.prevSelect = prevSelect; /* traverse all updates in the previous release */ gbVerbEnter(1, "migrating alignments"); for (prevUpdate = prevSelect->release->updates; prevUpdate != NULL; prevUpdate = prevUpdate->next) { prevSelect->update = prevUpdate; migrateAlignedUpdate(prevSelect, &migrate, out, recCounts); } prevSelect->update = prevUpdateHold; recCountsSum(recCounts, &migrate.counts); if (migrate.counts.pslCnts.recCnt[orgCatIdx] != alignInfo->migrate.recCnt[orgCatIdx]) errAbort("expected to migrate %d %s PSLs, found %d", alignInfo->migrate.recCnt[orgCatIdx], gbOrgCatName(select->orgCats), migrate.counts.pslCnts.recCnt[orgCatIdx]); gbVerbLeave(1, "migrating alignments"); }
void createMgcUnpickedEst(struct sqlConnection *conn) /* create the mgcUnpickedEst table */ { char sql[1024]; char tmpTbl[32]; tblBldGetTmpName(tmpTbl, sizeof(tmpTbl), MGC_UNPICKED_EST_TBL); gbVerbEnter(2, "loading %s", tmpTbl); tblBldRemakePslTable(conn, tmpTbl, "all_est"); /* insert a join by accession of the all_mrna table and mgcStatus rows not * full-length or inprogress status values. No acc in mgcStatus indicates a * imageClone id that has not be sequenced. */ safef(sql, sizeof(sql), "INSERT INTO %s" " SELECT all_est.* FROM all_est, mgcStatus_tmp, imageClone" " WHERE (all_est.qName = imageClone.acc)" " AND (imageClone.direction = '5')" " AND (mgcStatus_tmp.imageId = imageClone.imageId)" " AND (mgcStatus_tmp.acc = '')" " AND (mgcStatus_tmp.state = %d)", tmpTbl, MGC_STATE_UNPICKED); sqlUpdate(conn, sql); gbVerbLeave(2, "loading %s", tmpTbl); }
void copySelectedFasta(struct gbSelect* select) /* copy FASTA records that were selected for alignment, segregating by * native/xeno, and partitioning large files. */ { char inFasta[PATH_LEN]; struct gbFa* inFa; struct outFa* nativeFa = NULL; struct outFa* xenoFa = NULL; if (select->orgCats & GB_NATIVE) nativeFa = outFaNew(select, GB_NATIVE); if (select->orgCats & GB_XENO) xenoFa = outFaNew(select, GB_XENO); gbProcessedGetPath(select, "fa", inFasta); gbVerbEnter(2, "copying from %s", inFasta); inFa = gbFaOpen(inFasta, "r"); while (copyFastaRec(select, inFa, nativeFa, xenoFa)) continue; outFaFree(&nativeFa); outFaFree(&xenoFa); gbFaClose(&inFa); gbVerbLeave(2, "copying from %s", inFasta); }
void moveAll(char *srcDb, char* destDb) /* Rename all gbLoadRna tables from database to another. */ { struct slName *tables, *tbl; struct sqlConnection *conn; struct dyString* sqlCmd = dyStringNew(256); char *sep; gbVerbEnter(1, "moveAll"); conn = hAllocConn(srcDb); gbLockDb(conn, srcDb); gbLockDb(conn, destDb); copyChromInfo(conn, destDb); /* using one does rename atomically */ tables = getTableList(conn); dyStringAppend(sqlCmd, "rename table"); sep = " "; /* before first table arg */ for (tbl = tables; tbl != NULL; tbl = tbl->next) { dyStringPrintf(sqlCmd, "%s%s to %s.%s", sep,tbl->name, destDb, tbl->name); sep = ", "; /* before other table arg */ } sqlUpdate(conn, sqlCmd->string); dyStringFree(&sqlCmd); slFreeList(&tables); gbUnlockDb(conn, destDb); gbUnlockDb(conn, srcDb); hFreeConn(&conn); gbVerbLeave(1, "moveAll"); }
void copyAll(char *srcDb, char* destDb) /* Copy all gbLoadRna tables from database to another. */ { struct slName *tables, *tbl; struct sqlConnection *conn; gbVerbEnter(1, "copyAll"); conn = hAllocConn(srcDb); gbLockDb(conn, srcDb); gbLockDb(conn, destDb); copyChromInfo(conn, destDb); /* copy each table */ tables = getTableList(conn); for (tbl = tables; tbl != NULL; tbl = tbl->next) { copyTable(conn, destDb, tbl->name, tbl->name); } slFreeList(&tables); gbUnlockDb(conn, destDb); gbUnlockDb(conn, srcDb); hFreeConn(&conn); gbVerbLeave(1, "copyAll"); }
void cleanExtFileTable() /* clean up extFile table if we change references for any seq */ { struct sqlConnection *conn = hAllocConn(gDatabase); gbVerbEnter(3, "cleaning extFileTbl"); extFileTblClean(conn, (gbVerbose >= 4)); gbVerbLeave(3, "cleaning extFileTbl"); hFreeConn(&conn); }
void createMgcGenes(struct sqlConnection *conn) /* create the mgcGenes table from the mgcFullMrna table */ { char tmpGeneTbl[32], tmpMrnaTbl[32]; tblBldGetTmpName(tmpGeneTbl, sizeof(tmpGeneTbl), MGC_GENES_TBL); tblBldGetTmpName(tmpMrnaTbl, sizeof(tmpMrnaTbl), MGC_FULL_MRNA_TBL); gbVerbEnter(2, "loading %s", tmpGeneTbl); tblBldGenePredFromPsl(conn, workDir, tmpMrnaTbl, tmpGeneTbl, stderr); gbVerbLeave(2, "loading %s", tmpGeneTbl); }
void mgcDropTables(char *database) /* drop all MGC-related tables. */ { struct sqlConnection *conn = hAllocConn(database); gbVerbEnter(1, "droping MGC tables"); tblBldDropTables(conn, mgcFullTables, TBLBLD_REAL_TABLE|TBLBLD_TMP_TABLE|TBLBLD_OLD_TABLE); tblBldDropTables(conn, mgcAllTables, TBLBLD_REAL_TABLE|TBLBLD_TMP_TABLE|TBLBLD_OLD_TABLE); hFreeConn(&conn); gbVerbLeave(1, "droping MGC tables"); }
void parseUpdateMetaData(struct sqlConnection *conn, struct gbSelect* select, struct gbStatusTbl* statusTbl) /* Parse metadata for changed and new entry for an update. Done one * update at a time to allow reading the ra file in sequential order * (as there is one per update). This doesn't load the mrna or seq * tables, but might add to the unique string tables. */ { gbVerbEnter(4, "process metadata for %s", gbSelectDesc(select)); gbMetaDataProcess(conn, statusTbl, select); gbUpdateClearSelectVer(select->update); gbVerbLeave(4, "process metadata for %s", gbSelectDesc(select)); }
void chkMetaDataXRef(struct metaDataTbls* metaDataTbls) /* Verify that data that is referenced in some tables is in all expected * tables. Called after processing all indices */ { struct metaData* md; gbVerbEnter(1, "cross check metadata"); metaDataTblsFirst(metaDataTbls); while ((md = metaDataTblsNext(metaDataTbls)) != NULL) checkXRef(md); gbVerbLeave(1, "cross check metadata"); }
static void chkGbRelease(struct gbSelect* select, struct metaDataTbls* metaDataTbls) /* Check a partation of gbRelease */ { gbVerbEnter(1, "check: %s", gbSelectDesc(select)); /* load required entry date */ gbReleaseLoadProcessed(select); gbReleaseLoadAligned(select); chkGbIndex(select, metaDataTbls); /* unload entries to free memory */ gbReleaseUnload(select->release); gbVerbLeave(1, "check: %s", gbSelectDesc(select)); }
static void chkAlignCounts(struct metaDataTbls* metaDataTbls, char* dbTableDesc, unsigned typeFlags) /* check alignment counts found in tables, also reset the counts * Type includes GB_NATIVE or GB_XENO. */ { struct metaData* md; gbVerbEnter(3, "chkAlignCounts %s", dbTableDesc); /* Traverse all metadata entries, comparing number of alignments with * gbStatus. */ metaDataTblsFirst(metaDataTbls); while ((md = metaDataTblsNext(metaDataTbls)) != NULL) chkAlignCount(md, metaDataTbls, dbTableDesc, typeFlags); gbVerbLeave(3, "chkAlignCounts %s", dbTableDesc); }
static void chkGenePredTable(struct gbSelect* select, struct sqlConnection* conn, char* table, boolean isRefFlat, struct metaDataTbls* metaDataTbls, unsigned typeFlags) /* Validate a genePred table. Also count the number of genePreds for a * mrna. If this is refFlat, also check the geneName. Return numbner of * rows. */ { gbVerbEnter(3, "chkGenePredTable %s", table); if (!sqlTableExists(conn, table)) gbError("no genePred table %s.%s", select->release->genome->database, table); else chkGenePredRows(select, conn, table, isRefFlat, metaDataTbls, typeFlags); gbVerbLeave(3, "chkGenePredTable %s", table); }
void doLoadPartition(struct gbSelect* select) /* Do work of syncing the database with the state in the genbank respository for * a given partition. */ { gbVerbEnter(2, "load for %s", gbSelectDesc(select)); /* load required entry date */ gbReleaseLoadProcessed(select); gbReleaseLoadAligned(select); databaseUpdate(select); gbVerbLeave(2, "load for %s", gbSelectDesc(select)); /* unload entries to free memory */ gbReleaseUnload(select->release); }
void dropAll(char *database) /* Drop all gbLoadRna tables from database. */ { struct slName *tables, *tbl; struct sqlConnection *conn; gbVerbEnter(1, "dropAll"); conn = hAllocConn(database); gbLockDb(conn, NULL); tables = getTableList(conn); for (tbl = tables; tbl != NULL; tbl = tbl->next) sqlDropTable(conn, tbl->name); slFreeList(&tables); gbUnlockDb(conn, NULL); hFreeConn(&conn); gbVerbLeave(1, "dropAll"); }
void processUpdateAligns(struct sqlConnection *conn, struct gbSelect* select, struct gbUpdate* update, struct gbStatusTbl* statusTbl) /* Get alignements for an update. */ { select->update = update; gbVerbEnter(4, "process alignments: %s", gbSelectDesc(select)); if (select->orgCats & GB_NATIVE) processUpdateAlignsForOrgCat(conn, select, GB_NATIVE, statusTbl); if (select->orgCats & GB_XENO) processUpdateAlignsForOrgCat(conn, select, GB_XENO, statusTbl); gbUpdateClearSelectVer(select->update); gbVerbLeave(4, "process alignments: %s", gbSelectDesc(select)); select->update = NULL; }
void mgcDbLoad(char *database, char *mgcStatusTabFile) /* Load the database with the MGC tables. */ { gbVerbEnter(1, "Loading MGC tables"); struct sqlConnection *conn = hAllocConn(database); buildMgcTbls(conn, mgcStatusTabFile); installMgcTbls(conn); /* Drop tables only on *OTHER* type of browser, in case switching */ tblBldDropTables(conn, ((allMgcTables) ? mgcFullOnlyTables : mgcAllOnlyTables), TBLBLD_REAL_TABLE); /* Now get ride of old and do tmp as well, in case of switching browser * type */ tblBldDropTables(conn, mgcFullTables, TBLBLD_TMP_TABLE|TBLBLD_OLD_TABLE); tblBldDropTables(conn, mgcAllTables, TBLBLD_TMP_TABLE|TBLBLD_OLD_TABLE); hFreeConn(&conn); gbVerbLeave(1, "Loading MGC tables"); }
void gbAlignInstall(struct gbSelect* select, struct gbSelect* prevSelect) /* Install alignments, optionally migrating unchanged ones from a previous * release. This does one update, accPrefix and either native or xeno */ { char nativeAlignIdx[PATH_LEN], xenoAlignIdx[PATH_LEN]; struct gbAlignInfo alignInfo; gbVerbEnter(1, "gbAlignInstall: %s", gbSelectDesc(select)); /* load required entry date */ gbReleaseLoadProcessed(select); if (prevSelect != NULL) { gbReleaseLoadProcessed(prevSelect); gbReleaseLoadAligned(prevSelect); } /* mark entries and updates to migrate or align */ alignInfo = gbAlignFindNeedAligned(select, prevSelect); /* Process each category */ if (select->orgCats & GB_NATIVE) installOrgCatAligned(select, GB_NATIVE, prevSelect, &alignInfo, nativeAlignIdx); if (select->orgCats & GB_XENO) installOrgCatAligned(select, GB_XENO, prevSelect, &alignInfo, xenoAlignIdx); /* now indices can be renamed, not completely atomic, but good enough */ if (select->orgCats & GB_NATIVE) gbOutputRename(nativeAlignIdx, NULL); if (select->orgCats & GB_XENO) gbOutputRename(xenoAlignIdx, NULL); /* print message before memory is freed */ gbVerbLeave(1, "gbAlignInstall: %s", gbSelectDesc(select)); /* unload entries to free memory */ gbReleaseUnload(select->release); if (prevSelect != NULL) gbReleaseUnload(prevSelect->release); }
int chkAlignTables(char *db, struct gbSelect* select, struct sqlConnection* conn, struct metaDataTbls* metaDataTbls, struct dbLoadOptions *options) /* Verify all of the alignment-related. */ { int cnt = 0; if (gChromSizes == NULL) buildChromSizes(db); gbVerbEnter(1, "validating alignment tables: %s", gbSelectDesc(select)); if (select->release->srcDb & GB_GENBANK) { chkGenBankAlignTables(select, conn, metaDataTbls, options); cnt++; } if (select->release->srcDb & GB_REFSEQ) { chkRefSeqAlignTables(select, conn, metaDataTbls, options); cnt++; } gbVerbLeave(1, "validated alignment tables: %s", gbSelectDesc(select)); return cnt; }