Ejemplo n.º 1
0
static boolean isMgcFullLength()
/* determine if the current RA entry is for an MGC */
{
/* Check for keyword and source /clone field to identify MGC genes.  Keywords
 * end in `.' and are seperated by "; ".  So look for .
 * Clone can be like: /clone="MGC:9349 IMAGE:3846611" */
static boolean first = TRUE;
static regex_t keyRe, cloRe;
if (first)
    {
    reComp(&keyRe, "(^|.* )MGC(\\.$|;.*)");
    reComp(&cloRe, "(^|.* )MGC:[0-9]+( .*|$)");
    first = FALSE;
    }
char *key = raFieldCurVal("key");
if (key == NULL)
    return FALSE;
if (!reMatches(&keyRe, key))
    return FALSE;
char *clo = raFieldCurVal("clo");
if (clo == NULL)
    return FALSE;
if (!reMatches(&cloRe, clo))
    return FALSE;
return TRUE;
}
Ejemplo n.º 2
0
static void imageCloneUpdate(struct gbStatus* status, struct sqlConnection *conn)
/* update image clone table */
{
/* assumes image id is never removed; most like true, would only
 * be changed on a mistake */
if (status->stateChg & (GB_NEW|GB_META_CHG))
    {
    unsigned imageId = imageCloneGBParse(raFieldCurVal("clo"));
    if (imageId != 0)
        {
        if (status->stateChg & GB_NEW)
            {
            imageCloneTblAdd(imageCloneTbl, imageId, status->acc, status->type,
                             raDir);
            }
        else if (status->stateChg & GB_META_CHG)
            {
            unsigned oldImageId = imageCloneTblGetId(conn, status->acc);
            if (oldImageId == 0)
                imageCloneTblAdd(imageCloneTbl, imageId, status->acc, status->type,
                                 raDir);
            else if (imageId != oldImageId)
                imageCloneTblMod(imageCloneTbl, imageId, status->acc, raDir);
            }
        }
    }
}
Ejemplo n.º 3
0
static boolean isMgcFullLength()
/* determine if the current RA entry is for an MGC */
{
/* n.b. this use to check for /clone= field, which was used when none full
 * length MGCs were being processed.  This is no longer needed now that the
 * MGC project is over and we only check the keyword. */

return isMgcFullLengthKeyword(raFieldCurVal("key"));
}
Ejemplo n.º 4
0
static boolean isOrfeome()
/* determine if the current RA entry is for an ORFeome */
{
/* keyword containing "ORFeome collaboration" */
static boolean first = TRUE;
static regex_t keyRe;
if (first)
    {
    reComp(&keyRe, "(^|.* )ORFeome collaboration(\\.$|;.*)");
    first = FALSE;
    }
char *key = raFieldCurVal("key");
return (key != NULL) && reMatches(&keyRe, key);
}
Ejemplo n.º 5
0
static void refLinkUpdate(struct sqlConnection *conn, struct gbStatus* status)
/* Update the refLink table for the current entry */
{
int geneId;
char *gen = emptyForNull(raFieldCurVal("gen"));
char *pro = emptyForNull(raFieldCurVal("pro"));
gen = sqlEscapeString2(alloca(2*strlen(gen)+1), gen);
pro = sqlEscapeString2(alloca(2*strlen(pro)+1), pro);

/* can either have locus id (old locus link db) or gene id, or both,
 * in which case the geneId is used */
geneId = (raGeneId != 0) ? raGeneId : raLocusLinkId;

if (status->stateChg & GB_NEW)
    sqlUpdaterAddRow(refLinkUpd, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u",
                     gen, pro, raAcc, raProtAcc, raFieldCurId("gen"),
                     raFieldCurId("pro"), geneId, raOmimId);
else if (status->stateChg & GB_META_CHG)
    sqlUpdaterModRow(refLinkUpd, 1, "name='%s', product='%s', protAcc='%s', "
                     "geneName=%u, prodName=%u, locusLinkId=%u, "
                     "omimId=%u where mrnaAcc='%s'",
                     gen, pro, raProtAcc, raFieldCurId("gen"),
                     raFieldCurId("pro"), geneId, raOmimId, raAcc);
}
Ejemplo n.º 6
0
static void updateMetaData(struct sqlConnection *conn, struct gbStatus* status,
                           struct gbStatusTbl* statusTbl, HGID faFileId,
                           HGID pepFaId)
/* update the database tables for the current entry based on the stateChg
 * flags */
{
assert(status->stateChg & (GB_NEW|GB_META_CHG|GB_REBUILD_DERIVED));

/* check for MGC, ORFeome */
if (status->orgCat == GB_NATIVE)
    {
    if (haveMgc)
        status->isMgcFull = isMgcFullLength();
    if (haveOrfeome)
        status->isOrfeome = isOrfeome();
    }

/* clear description if we are not keeping it */
if (!keepDesc(status))
    raFieldClear("def");

/* most database changes are only done for GB_EXT_CHG */

if (status->stateChg & (GB_NEW|GB_META_CHG))
    {
    seqUpdate(status, faFileId);  /* must be first to get status->gbSeqId */
    gbCdnaInfoUpdate(status, conn);
    imageCloneUpdate(status, conn);
    }
if (raMiscDiffs != NULL)
    gbMiscDiffUpdate(status, conn);
if (raWarn != NULL)
    gbWarnUpdate(status, conn);
if ((gSrcDb == GB_REFSEQ) && (status->stateChg & (GB_NEW|GB_META_CHG)))
    {
    refSeqStatusUpdate(status);
    refSeqSummaryUpdate(conn, status);
    refLinkUpdate(conn, status);
    refSeqPepUpdate(conn, pepFaId);
    }

/* update in-memory gbStatus entry  */
status->modDate = raModDate;

/* save CDS for use by the alignments */
if (!genbankCdsParse(raCds, &status->cds))
    {
    /* not valid CDS, only warn if RefSeq, where we expect to be better */
    if ((gSrcDb == GB_REFSEQ) && gbIsProteinCodingRefSeq(status->acc))
        prWarn("%s: malformed RefSeq CDS: %s", status->acc, raCds);
    }

/* geneName for refFlat, if not available, try locus_tag  */
char *geneName = raFieldCurVal("gen");
if (geneName == NULL)
    geneName = raFieldCurVal("lot");
if (geneName != NULL)
    status->geneName = lmCloneString(statusTbl->accHash->lm, geneName);

/* mark as done so dups in other updates don't get added to the database */
status->metaDone = TRUE;
}
Ejemplo n.º 7
0
static boolean isOrfeome()
/* determine if the current RA entry is for an ORFeome */
{
return isOrfeomeKeyword(raFieldCurVal("key"));
}