static void checkXRefRefSeq(struct metaData* md)
/* check sanity of collected additional metadata for RefSeq */
{
if (!md->inRefSeqStatus)
    gbError("%s: not in refSeqStatus table, and is RefSeq acc", md->acc);
if (!md->inRefLink)
    gbError("%s: not in refLink table, and is RefSeq acc ", md->acc);
if (gbIsProteinCodingRefSeq(md->acc))
    {
    if (!md->hasProt)
        gbError("%s: no peptide for RefSeq", md->acc);
    else 
        {
        if (!md->protInSeq)
            gbError("%s: RefSeq peptide %s not in gbSeq table", md->acc, md->rlProtAcc);
        else if (!md->protInExtFile)
            gbError("%s: RefSeq peptide %s not in gbExtFile table", md->acc, md->rlProtAcc);
        }
    }
}
static void loadRefLinkRow(struct metaDataTbls* metaDataTbls,
                           struct sqlConnection* conn, char** row)
/* load a row of the refLink table */
{
/* columns: mrnaAcc,name,product,protAcc,geneName,prodName,locusLinkId,omimId */
struct metaData* md;
int iRow = 0;
char *acc = row[iRow++];
char *product;

if (!(startsWith("NM_", acc) || startsWith("NR_", acc)))
    {
    gbError("%s: non-NM_/NR_ mrnaAcc in refLink", acc);
    return;
    }

md = metaDataTblsGet(metaDataTbls, acc);
if (md->inRefLink)
    gbError("%s: occurs multiple times in the refLink table", md->acc);
md->inRefLink = TRUE;
safef(md->rlName, sizeof(md->rlName), "%s", row[iRow++]);
product = row[iRow++];
safef(md->rlProtAcc, sizeof(md->rlProtAcc), row[iRow++]);

/* check if ids are valid (zero is allowed, so just parse) */
strToUnsigned(row[iRow++], md->acc, "refLink.geneName", NULL);
strToUnsigned(row[iRow++], md->acc, "refLink.prodName", NULL);
strToUnsigned(row[iRow++], md->acc, "refLink.locusLinkId", NULL);
strToUnsigned(row[iRow++], md->acc, "refLink.omimId", NULL);

if (gbIsProteinCodingRefSeq(md->acc))
    {
    if (strlen(md->rlProtAcc) == 0)
        gbError("%s: empty protein acc in refLink", acc);
    else
        {
        metaDataTblsAddProtAcc(metaDataTbls, md);
        md->hasProt = TRUE;
        }
    }
}
Esempio n. 3
0
static void updateMetaData(struct sqlConnection *conn, struct gbStatus* status,
                           struct gbStatusTbl* statusTbl, HGID faFileId,
                           HGID pepFaId)
/* update the database tables for the current entry based on the stateChg
 * flags */
{
assert(status->stateChg & (GB_NEW|GB_META_CHG|GB_REBUILD_DERIVED));

/* check for MGC, ORFeome */
if (status->orgCat == GB_NATIVE)
    {
    if (haveMgc)
        status->isMgcFull = isMgcFullLength();
    if (haveOrfeome)
        status->isOrfeome = isOrfeome();
    }

/* clear description if we are not keeping it */
if (!keepDesc(status))
    raFieldClear("def");

/* most database changes are only done for GB_EXT_CHG */

if (status->stateChg & (GB_NEW|GB_META_CHG))
    {
    seqUpdate(status, faFileId);  /* must be first to get status->gbSeqId */
    gbCdnaInfoUpdate(status, conn);
    imageCloneUpdate(status, conn);
    }
if (raMiscDiffs != NULL)
    gbMiscDiffUpdate(status, conn);
if (raWarn != NULL)
    gbWarnUpdate(status, conn);
if ((gSrcDb == GB_REFSEQ) && (status->stateChg & (GB_NEW|GB_META_CHG)))
    {
    refSeqStatusUpdate(status);
    refSeqSummaryUpdate(conn, status);
    refLinkUpdate(conn, status);
    refSeqPepUpdate(conn, pepFaId);
    }

/* update in-memory gbStatus entry  */
status->modDate = raModDate;

/* save CDS for use by the alignments */
if (!genbankCdsParse(raCds, &status->cds))
    {
    /* not valid CDS, only warn if RefSeq, where we expect to be better */
    if ((gSrcDb == GB_REFSEQ) && gbIsProteinCodingRefSeq(status->acc))
        prWarn("%s: malformed RefSeq CDS: %s", status->acc, raCds);
    }

/* geneName for refFlat, if not available, try locus_tag  */
char *geneName = raFieldCurVal("gen");
if (geneName == NULL)
    geneName = raFieldCurVal("lot");
if (geneName != NULL)
    status->geneName = lmCloneString(statusTbl->accHash->lm, geneName);

/* mark as done so dups in other updates don't get added to the database */
status->metaDone = TRUE;
}