Beispiel #1
0
void refPepList(char *db,
                FILE* outFh)
/* list of sequences needing repair */
{
struct sqlConnection *conn = sqlConnect(db);
struct brokenRefPepTbl *brpTbl;
struct hashCookie cookie;
struct hashEl *hel;
struct extFileTbl* extFileTbl = NULL;

if (!checkForRefLink(conn))
    {
    sqlDisconnect(&conn);
    return;
    }

extFileTbl = extFileTblLoad(conn);
brpTbl = brokenRefPepTblNew(conn, NULL);
brokenRefPepGetSeqScan(conn, extFileTbl, brpTbl);
brokenRefPepGetMrnas(conn, brpTbl);
extFileTblFree(&extFileTbl);

cookie = hashFirst(brpTbl->protAccHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct brokenRefPep *brp = hel->val;
    fprintf(outFh, "%s\t%s\t%s\n", sqlGetDatabase(conn), brp->protAcc, (brp->mrnaAcc != NULL)? "repair" : "drop");
    }
gbVerbMsg(1, "%s: need to repair %d refseq protein gbExtFile entries",
          sqlGetDatabase(conn), brpTbl->numToRepair);
gbVerbMsg(1, "%s: need to drop %d refseq protein gbExtFile entries",
          sqlGetDatabase(conn), brpTbl->numToDrop);
}
Beispiel #2
0
void refPepRepair(char *db,
                  char *accFile,
                  boolean dryRun)
/* fix dangling repPep gbSeq entries. */
{
struct sqlConnection *conn = sqlConnect(db);
struct brokenRefPepTbl *brpTbl;
struct extFileTbl* extFileTbl;
struct slName *accs = (accFile == NULL) ? NULL : slNameLoadReal(accFile);
if (!checkForRefLink(conn))
    {
    sqlDisconnect(&conn);
    return;
    }

gbVerbMsg(1, "%s: repairing refseq protein gbExtFile entries%s",
          sqlGetDatabase(conn), (dryRun? " (dry run)" : ""));

extFileTbl = extFileTblLoad(conn);
brpTbl = brokenRefPepTblNew(conn, accs);
brokenRefPepGetSeqScan(conn, extFileTbl, brpTbl);
brokenRefPepGetMrnas(conn, brpTbl);

fillInFastaOffsets(brpTbl, conn, extFileTbl);
if (brpTbl->numToRepair > 0)
    makeRepairs(brpTbl, conn, extFileTbl, dryRun);
else
    gbVerbMsg(1, "%s: no refseq proteins to repair", sqlGetDatabase(conn));
brokenRefPepTblFree(&brpTbl);
extFileTblFree(&extFileTbl);
sqlDisconnect(&conn);
slFreeList(&accs);
}
void loadSeqData(struct metaDataTbls* metaDataTbls,
                 struct gbSelect* select, struct sqlConnection* conn,
                 boolean checkExtSeqRecs, char* gbdbMapToCurrent)
/* load seq table data, gbCdnaInfo table should be loaded. For
* refseq, refLink should also have been loaded*/
{
struct extFileTbl* extFileTbl;

gbVerbMsg(2,  "load gbExtFile table data");
extFileTbl = extFileTblLoad(conn);

/* setup globals */
if (missingExtFileIds == NULL) 
    {
    missingExtFileIds = hashNew(16);
    missingExtFiles = hashNew(16);
    }
loadSeqCDnaData(metaDataTbls,select, conn, checkExtSeqRecs, gbdbMapToCurrent,
                extFileTbl);
if (select->release->srcDb == GB_REFSEQ)
    loadSeqPepData(metaDataTbls, conn, checkExtSeqRecs, gbdbMapToCurrent,
                   extFileTbl);

extFileTblFree(&extFileTbl);
}
static HGID getExtFileId(struct sqlConnection *conn, char* relPath)
/* get the extFile id for a file, prepending the gbdb root dir */
{
char path[PATH_LEN];
path[0] = '\0';
if (gGbdbGenBank[0] != '\0')
    {
    strcpy(path, gGbdbGenBank);
    strcat(path, "/");
    }
strcat(path, relPath);

if (extFiles == NULL)
    {
    gbVerbMsg(4, "loading extFile table");
    extFiles = extFileTblLoad(conn);
    }
return extFileTblGet(extFiles, conn, path);
}