static void checkXRefRefSeq(struct metaData* md) /* check sanity of collected additional metadata for RefSeq */ { if (!md->inRefSeqStatus) gbError("%s: not in refSeqStatus table, and is RefSeq acc", md->acc); if (!md->inRefLink) gbError("%s: not in refLink table, and is RefSeq acc ", md->acc); if (gbIsProteinCodingRefSeq(md->acc)) { if (!md->hasProt) gbError("%s: no peptide for RefSeq", md->acc); else { if (!md->protInSeq) gbError("%s: RefSeq peptide %s not in gbSeq table", md->acc, md->rlProtAcc); else if (!md->protInExtFile) gbError("%s: RefSeq peptide %s not in gbExtFile table", md->acc, md->rlProtAcc); } } }
static void loadRefLinkRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row) /* load a row of the refLink table */ { /* columns: mrnaAcc,name,product,protAcc,geneName,prodName,locusLinkId,omimId */ struct metaData* md; int iRow = 0; char *acc = row[iRow++]; char *product; if (!(startsWith("NM_", acc) || startsWith("NR_", acc))) { gbError("%s: non-NM_/NR_ mrnaAcc in refLink", acc); return; } md = metaDataTblsGet(metaDataTbls, acc); if (md->inRefLink) gbError("%s: occurs multiple times in the refLink table", md->acc); md->inRefLink = TRUE; safef(md->rlName, sizeof(md->rlName), "%s", row[iRow++]); product = row[iRow++]; safef(md->rlProtAcc, sizeof(md->rlProtAcc), row[iRow++]); /* check if ids are valid (zero is allowed, so just parse) */ strToUnsigned(row[iRow++], md->acc, "refLink.geneName", NULL); strToUnsigned(row[iRow++], md->acc, "refLink.prodName", NULL); strToUnsigned(row[iRow++], md->acc, "refLink.locusLinkId", NULL); strToUnsigned(row[iRow++], md->acc, "refLink.omimId", NULL); if (gbIsProteinCodingRefSeq(md->acc)) { if (strlen(md->rlProtAcc) == 0) gbError("%s: empty protein acc in refLink", acc); else { metaDataTblsAddProtAcc(metaDataTbls, md); md->hasProt = TRUE; } } }
static void updateMetaData(struct sqlConnection *conn, struct gbStatus* status, struct gbStatusTbl* statusTbl, HGID faFileId, HGID pepFaId) /* update the database tables for the current entry based on the stateChg * flags */ { assert(status->stateChg & (GB_NEW|GB_META_CHG|GB_REBUILD_DERIVED)); /* check for MGC, ORFeome */ if (status->orgCat == GB_NATIVE) { if (haveMgc) status->isMgcFull = isMgcFullLength(); if (haveOrfeome) status->isOrfeome = isOrfeome(); } /* clear description if we are not keeping it */ if (!keepDesc(status)) raFieldClear("def"); /* most database changes are only done for GB_EXT_CHG */ if (status->stateChg & (GB_NEW|GB_META_CHG)) { seqUpdate(status, faFileId); /* must be first to get status->gbSeqId */ gbCdnaInfoUpdate(status, conn); imageCloneUpdate(status, conn); } if (raMiscDiffs != NULL) gbMiscDiffUpdate(status, conn); if (raWarn != NULL) gbWarnUpdate(status, conn); if ((gSrcDb == GB_REFSEQ) && (status->stateChg & (GB_NEW|GB_META_CHG))) { refSeqStatusUpdate(status); refSeqSummaryUpdate(conn, status); refLinkUpdate(conn, status); refSeqPepUpdate(conn, pepFaId); } /* update in-memory gbStatus entry */ status->modDate = raModDate; /* save CDS for use by the alignments */ if (!genbankCdsParse(raCds, &status->cds)) { /* not valid CDS, only warn if RefSeq, where we expect to be better */ if ((gSrcDb == GB_REFSEQ) && gbIsProteinCodingRefSeq(status->acc)) prWarn("%s: malformed RefSeq CDS: %s", status->acc, raCds); } /* geneName for refFlat, if not available, try locus_tag */ char *geneName = raFieldCurVal("gen"); if (geneName == NULL) geneName = raFieldCurVal("lot"); if (geneName != NULL) status->geneName = lmCloneString(statusTbl->accHash->lm, geneName); /* mark as done so dups in other updates don't get added to the database */ status->metaDone = TRUE; }