void refPepList(char *db, FILE* outFh) /* list of sequences needing repair */ { struct sqlConnection *conn = sqlConnect(db); struct brokenRefPepTbl *brpTbl; struct hashCookie cookie; struct hashEl *hel; struct extFileTbl* extFileTbl = NULL; if (!checkForRefLink(conn)) { sqlDisconnect(&conn); return; } extFileTbl = extFileTblLoad(conn); brpTbl = brokenRefPepTblNew(conn, NULL); brokenRefPepGetSeqScan(conn, extFileTbl, brpTbl); brokenRefPepGetMrnas(conn, brpTbl); extFileTblFree(&extFileTbl); cookie = hashFirst(brpTbl->protAccHash); while ((hel = hashNext(&cookie)) != NULL) { struct brokenRefPep *brp = hel->val; fprintf(outFh, "%s\t%s\t%s\n", sqlGetDatabase(conn), brp->protAcc, (brp->mrnaAcc != NULL)? "repair" : "drop"); } gbVerbMsg(1, "%s: need to repair %d refseq protein gbExtFile entries", sqlGetDatabase(conn), brpTbl->numToRepair); gbVerbMsg(1, "%s: need to drop %d refseq protein gbExtFile entries", sqlGetDatabase(conn), brpTbl->numToDrop); }
void refPepRepair(char *db, char *accFile, boolean dryRun) /* fix dangling repPep gbSeq entries. */ { struct sqlConnection *conn = sqlConnect(db); struct brokenRefPepTbl *brpTbl; struct extFileTbl* extFileTbl; struct slName *accs = (accFile == NULL) ? NULL : slNameLoadReal(accFile); if (!checkForRefLink(conn)) { sqlDisconnect(&conn); return; } gbVerbMsg(1, "%s: repairing refseq protein gbExtFile entries%s", sqlGetDatabase(conn), (dryRun? " (dry run)" : "")); extFileTbl = extFileTblLoad(conn); brpTbl = brokenRefPepTblNew(conn, accs); brokenRefPepGetSeqScan(conn, extFileTbl, brpTbl); brokenRefPepGetMrnas(conn, brpTbl); fillInFastaOffsets(brpTbl, conn, extFileTbl); if (brpTbl->numToRepair > 0) makeRepairs(brpTbl, conn, extFileTbl, dryRun); else gbVerbMsg(1, "%s: no refseq proteins to repair", sqlGetDatabase(conn)); brokenRefPepTblFree(&brpTbl); extFileTblFree(&extFileTbl); sqlDisconnect(&conn); slFreeList(&accs); }
void loadSeqData(struct metaDataTbls* metaDataTbls, struct gbSelect* select, struct sqlConnection* conn, boolean checkExtSeqRecs, char* gbdbMapToCurrent) /* load seq table data, gbCdnaInfo table should be loaded. For * refseq, refLink should also have been loaded*/ { struct extFileTbl* extFileTbl; gbVerbMsg(2, "load gbExtFile table data"); extFileTbl = extFileTblLoad(conn); /* setup globals */ if (missingExtFileIds == NULL) { missingExtFileIds = hashNew(16); missingExtFiles = hashNew(16); } loadSeqCDnaData(metaDataTbls,select, conn, checkExtSeqRecs, gbdbMapToCurrent, extFileTbl); if (select->release->srcDb == GB_REFSEQ) loadSeqPepData(metaDataTbls, conn, checkExtSeqRecs, gbdbMapToCurrent, extFileTbl); extFileTblFree(&extFileTbl); }
static HGID getExtFileId(struct sqlConnection *conn, char* relPath) /* get the extFile id for a file, prepending the gbdb root dir */ { char path[PATH_LEN]; path[0] = '\0'; if (gGbdbGenBank[0] != '\0') { strcpy(path, gGbdbGenBank); strcat(path, "/"); } strcat(path, relPath); if (extFiles == NULL) { gbVerbMsg(4, "loading extFile table"); extFiles = extFileTblLoad(conn); } return extFileTblGet(extFiles, conn, path); }