static boolean isMgcFullLength() /* determine if the current RA entry is for an MGC */ { /* Check for keyword and source /clone field to identify MGC genes. Keywords * end in `.' and are seperated by "; ". So look for . * Clone can be like: /clone="MGC:9349 IMAGE:3846611" */ static boolean first = TRUE; static regex_t keyRe, cloRe; if (first) { reComp(&keyRe, "(^|.* )MGC(\\.$|;.*)"); reComp(&cloRe, "(^|.* )MGC:[0-9]+( .*|$)"); first = FALSE; } char *key = raFieldCurVal("key"); if (key == NULL) return FALSE; if (!reMatches(&keyRe, key)) return FALSE; char *clo = raFieldCurVal("clo"); if (clo == NULL) return FALSE; if (!reMatches(&cloRe, clo)) return FALSE; return TRUE; }
static void imageCloneUpdate(struct gbStatus* status, struct sqlConnection *conn) /* update image clone table */ { /* assumes image id is never removed; most like true, would only * be changed on a mistake */ if (status->stateChg & (GB_NEW|GB_META_CHG)) { unsigned imageId = imageCloneGBParse(raFieldCurVal("clo")); if (imageId != 0) { if (status->stateChg & GB_NEW) { imageCloneTblAdd(imageCloneTbl, imageId, status->acc, status->type, raDir); } else if (status->stateChg & GB_META_CHG) { unsigned oldImageId = imageCloneTblGetId(conn, status->acc); if (oldImageId == 0) imageCloneTblAdd(imageCloneTbl, imageId, status->acc, status->type, raDir); else if (imageId != oldImageId) imageCloneTblMod(imageCloneTbl, imageId, status->acc, raDir); } } } }
static boolean isMgcFullLength() /* determine if the current RA entry is for an MGC */ { /* n.b. this use to check for /clone= field, which was used when none full * length MGCs were being processed. This is no longer needed now that the * MGC project is over and we only check the keyword. */ return isMgcFullLengthKeyword(raFieldCurVal("key")); }
static boolean isOrfeome() /* determine if the current RA entry is for an ORFeome */ { /* keyword containing "ORFeome collaboration" */ static boolean first = TRUE; static regex_t keyRe; if (first) { reComp(&keyRe, "(^|.* )ORFeome collaboration(\\.$|;.*)"); first = FALSE; } char *key = raFieldCurVal("key"); return (key != NULL) && reMatches(&keyRe, key); }
static void refLinkUpdate(struct sqlConnection *conn, struct gbStatus* status) /* Update the refLink table for the current entry */ { int geneId; char *gen = emptyForNull(raFieldCurVal("gen")); char *pro = emptyForNull(raFieldCurVal("pro")); gen = sqlEscapeString2(alloca(2*strlen(gen)+1), gen); pro = sqlEscapeString2(alloca(2*strlen(pro)+1), pro); /* can either have locus id (old locus link db) or gene id, or both, * in which case the geneId is used */ geneId = (raGeneId != 0) ? raGeneId : raLocusLinkId; if (status->stateChg & GB_NEW) sqlUpdaterAddRow(refLinkUpd, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u", gen, pro, raAcc, raProtAcc, raFieldCurId("gen"), raFieldCurId("pro"), geneId, raOmimId); else if (status->stateChg & GB_META_CHG) sqlUpdaterModRow(refLinkUpd, 1, "name='%s', product='%s', protAcc='%s', " "geneName=%u, prodName=%u, locusLinkId=%u, " "omimId=%u where mrnaAcc='%s'", gen, pro, raProtAcc, raFieldCurId("gen"), raFieldCurId("pro"), geneId, raOmimId, raAcc); }
static void updateMetaData(struct sqlConnection *conn, struct gbStatus* status, struct gbStatusTbl* statusTbl, HGID faFileId, HGID pepFaId) /* update the database tables for the current entry based on the stateChg * flags */ { assert(status->stateChg & (GB_NEW|GB_META_CHG|GB_REBUILD_DERIVED)); /* check for MGC, ORFeome */ if (status->orgCat == GB_NATIVE) { if (haveMgc) status->isMgcFull = isMgcFullLength(); if (haveOrfeome) status->isOrfeome = isOrfeome(); } /* clear description if we are not keeping it */ if (!keepDesc(status)) raFieldClear("def"); /* most database changes are only done for GB_EXT_CHG */ if (status->stateChg & (GB_NEW|GB_META_CHG)) { seqUpdate(status, faFileId); /* must be first to get status->gbSeqId */ gbCdnaInfoUpdate(status, conn); imageCloneUpdate(status, conn); } if (raMiscDiffs != NULL) gbMiscDiffUpdate(status, conn); if (raWarn != NULL) gbWarnUpdate(status, conn); if ((gSrcDb == GB_REFSEQ) && (status->stateChg & (GB_NEW|GB_META_CHG))) { refSeqStatusUpdate(status); refSeqSummaryUpdate(conn, status); refLinkUpdate(conn, status); refSeqPepUpdate(conn, pepFaId); } /* update in-memory gbStatus entry */ status->modDate = raModDate; /* save CDS for use by the alignments */ if (!genbankCdsParse(raCds, &status->cds)) { /* not valid CDS, only warn if RefSeq, where we expect to be better */ if ((gSrcDb == GB_REFSEQ) && gbIsProteinCodingRefSeq(status->acc)) prWarn("%s: malformed RefSeq CDS: %s", status->acc, raCds); } /* geneName for refFlat, if not available, try locus_tag */ char *geneName = raFieldCurVal("gen"); if (geneName == NULL) geneName = raFieldCurVal("lot"); if (geneName != NULL) status->geneName = lmCloneString(statusTbl->accHash->lm, geneName); /* mark as done so dups in other updates don't get added to the database */ status->metaDone = TRUE; }
static boolean isOrfeome() /* determine if the current RA entry is for an ORFeome */ { return isOrfeomeKeyword(raFieldCurVal("key")); }