static void loadSeqCDnaRow(struct metaDataTbls* metaDataTbls, struct extFileTbl* extFileTbl, boolean checkExtSeqRecs, char* gbdbMapToCurrent, struct sqlConnection* conn, char **row) /* load one row for a cDNA from the seq table */ { struct seqFields seq; struct metaData* md; parseGbSeqRow(row, &seq); md = metaDataTblsGet(metaDataTbls, seq.acc); if (md->inSeq) gbError("%s: acc occurs multiple times in the seq table", seq.acc); md->inSeq = TRUE; md->seqSize = seq.size; if (md->inGbCdnaInfo) { if (seq.id != md->gbCdnaInfoId) gbError("%s: gbSeq.id (%d) not same gbCdnaInfo.id (%d)", seq.acc, seq.id, md->gbCdnaInfoId); if (seq.type != md->gbCdnaInfoType) gbError("%s: gbSeq.type (%s) not same as gbCdnaInfo.type (%s)", seq.acc, gbFmtSelect(seq.type), gbFmtSelect(md->gbCdnaInfoType)); if ((seq.srcDb & md->typeFlags) == 0) gbError("%s: gbSeq.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", seq.acc, gbFmtSelect(seq.srcDb), gbFmtSelect(md->typeFlags)); if (md->seqSize >= seq.file_size) gbError("%s: gbSeq.size >= gbSeq.file_size", seq.acc); } if (verifySeqExtFile(&seq, extFileTbl, checkExtSeqRecs, gbdbMapToCurrent)) md->inExtFile = TRUE; }
static void loadSeqPepRow(struct metaDataTbls* metaDataTbls, struct extFileTbl* extFileTbl, boolean checkExtSeqRecs, char* gbdbMapToCurrent, struct sqlConnection* conn, char **row) /* load one row for a from the seq table */ { struct seqFields seq; struct metaData* md; parseGbSeqRow(row, &seq); md = metaDataTblsGetByPep(metaDataTbls, seq.acc); if (md == NULL) { #if 0 //FIXME: disabled due to known and harmless bug gbError("%s: peptide in gbSeq not found in refLink", seq.acc); #endif } else { if (md->protInSeq) gbError("%s: acc occurs multiple times in the seq table", seq.acc); md->protInSeq = TRUE; if (verifySeqExtFile(&seq, extFileTbl, checkExtSeqRecs, gbdbMapToCurrent)) md->protInExtFile = TRUE; } }
static void chkGbStatusGbEntry(struct gbSelect* select, struct gbEntry* entry, struct metaData* md) /* check entry fields against status fields */ { /* processed entry should be the one matching the aligned update */ struct gbAligned* aligned = gbEntryFindAlignedVer(entry, md->gbsVersion); if (aligned == NULL) gbError("%s.%d: no genbank gbIndex aligned object for gbStatus", md->acc, md->gbsVersion); else { /* search for a processed entry matching this data and version */ struct gbProcessed* processed = entry->processed; while ((processed != NULL) && !((processed->modDate == md->gbsModDate) && (processed->version == md->gbsVersion))) processed = processed->next; if (processed == NULL) gbError("%s: no gbIndex processed entry for version %d, moddate %s, update %s", md->acc, md->gbsVersion, gbFormatDate(md->gbsModDate), aligned->update->name); if (aligned->numAligns != md->gbsNumAligns) gbError("%s.%d: genbank index number of alignments (%d) does not match gbStatus (%d)", md->acc, md->gbsVersion, aligned->numAligns, md->gbsNumAligns); } }
static boolean verifySeqExtFile(struct seqFields *seq, struct extFileTbl* extFileTbl, boolean checkExtSeqRecs, char* gbdbMapToCurrent) /* verify a seq table information with it's extFile entry. return true * if found in extFile table. */ { #if 0 /* FIXME: for disabled code below */ int mapLen = (gbdbMapToCurrent != NULL) ? strlen(gbdbMapToCurrent) : 0; #endif char* extPath; boolean badSeq = FALSE; struct extFile* extFile = getExtFile(seq->acc, extFileTbl, seq->gbExtFile); if (extFile == NULL) return FALSE; /* all that can be checked */ /* now, sanity check seq with extFile */ if ((seq->file_offset+seq->file_size) > extFile->size) { gbError("%s: gbSeq.file_offset+gbSeq.file_size > gbExtFile.size", seq->acc); badSeq = TRUE; } /* map path to local directory if requested */ extPath = extFile->path; #if 0 /* FIXME: load stores full path when redirected */ if ((gbdbMapToCurrent != NULL) && startsWith(gbdbMapToCurrent, extPath) && (extPath[mapLen] == '/')) extPath += mapLen+1; #endif /* check readability and size of file. if found unreadable before, don't repeat * message or rest of tests. */ if (alreadyReported(missingExtFiles, extPath)) return TRUE; /* already reported, nothing more to do */ if (access(extPath, R_OK) < 0) { gbError("%s: extFile does not exist or is not readable: %s", seq->acc, extPath); flagReported(missingExtFiles, extPath); badSeq = TRUE; } else if (fileSize(extPath) != extFile->size) { gbError("%s: disk file size (%lld) does not match ext.size (%lld): %s", seq->acc, (long long)fileSize(extPath), (long long)extFile->size, extPath); flagReported(missingExtFiles, extPath); badSeq = TRUE; } if (!badSeq && checkExtSeqRecs) checkExtRecord(seq, extPath); return TRUE; }
static void checkGbCdnaInfoStrKeys(struct sqlConnection* conn) /* Verify that the ids appear valid for all of the unique string tables * referenced by the mrna table. This does a join of the mrna with * all of the other tables. If the number of results don't match * the number of rows in the table, some of the ids are wrong. */ { static char *joinSql = "NOSQLINJ SELECT count(*) FROM " "gbCdnaInfo,author,cds,cell,description,development,geneName," "keyword,library,mrnaClone,organism,productName,sex," "source,tissue " "WHERE gbCdnaInfo.author=author.id AND gbCdnaInfo.cds=cds.id " "AND gbCdnaInfo.cell=cell.id AND gbCdnaInfo.description=description.id " "AND gbCdnaInfo.development=development.id AND gbCdnaInfo.geneName=geneName.id " "AND gbCdnaInfo.keyword=keyword.id AND gbCdnaInfo.library=library.id " "AND gbCdnaInfo.mrnaClone=mrnaClone.id AND gbCdnaInfo.organism=organism.id " "AND gbCdnaInfo.productName=productName.id AND gbCdnaInfo.sex=sex.id " "AND gbCdnaInfo.source=source.id AND gbCdnaInfo.tissue=tissue.id"; unsigned numJoinRows = sqlQuickNum(conn, joinSql); unsigned numTotalRows = sqlQuickNum(conn, "NOSQLINJ SELECT count(*) FROM gbCdnaInfo"); if (numJoinRows != numTotalRows) gbError("number of rows in gbCdnaInfo join with string tables does (%u) " "does not match total in table (%u), something is wrong", numJoinRows, numTotalRows); }
static void checkXRef(struct metaData* md) /* check sanity of collected metadata */ { if (!md->inGbAlign) return; /* can't check anything else */ if (!md->inGbCdnaInfo) gbError("%s: not in gbCdnaInfo table, referenced in %s", md->acc, getTablesDesc(md)); if (!md->inSeq) gbError("%s: not in seq table, referenced in %s", md->acc, getTablesDesc(md)); if (!md->inExtFile) gbError("%s: not in gbExtFile table, referenced in %s", md->acc, getTablesDesc(md)); if (md->typeFlags & GB_REFSEQ) { checkXRefRefSeq(md); } else { if (md->inRefSeqStatus) gbError("%s: in refSeqStatus table, and not RefSeq acc", md->acc); if (md->inRefLink) gbError("%s: in refLink table, and not RefSeq acc", md->acc); } if (!md->inGbStatus) gbError("%s: not in gbStatus table, referenced in %s", md->acc, getTablesDesc(md)); if (!md->inGbIndex) gbError("%s: not in gbIndex, referenced in %s", md->acc, getTablesDesc(md)); }
off_t strToOffset(char* str, char* acc, char* useMsg) /* Parse a string into an offset_t. */ { char* stop; off_t num = 0; num = strtoull(str, &stop, 10); if ((*stop != '\0') || (stop == str)) gbError("%s: invalid offset \"%s\": %s ", acc, str, useMsg); return num; }
static void checkXRefRefSeq(struct metaData* md) /* check sanity of collected additional metadata for RefSeq */ { if (!md->inRefSeqStatus) gbError("%s: not in refSeqStatus table, and is RefSeq acc", md->acc); if (!md->inRefLink) gbError("%s: not in refLink table, and is RefSeq acc ", md->acc); if (gbIsProteinCodingRefSeq(md->acc)) { if (!md->hasProt) gbError("%s: no peptide for RefSeq", md->acc); else { if (!md->protInSeq) gbError("%s: RefSeq peptide %s not in gbSeq table", md->acc, md->rlProtAcc); else if (!md->protInExtFile) gbError("%s: RefSeq peptide %s not in gbExtFile table", md->acc, md->rlProtAcc); } } }
static void chkGenePred(struct genePred* gene, char *geneName, unsigned iRow, char* database, char* table, struct metaDataTbls* metaDataTbls, unsigned typeFlags) /* Validate a genePred of a refSeq to genome alignment against the metadata. * Also count the number of alignments, and check the geneName, if available */ { char desc[512]; unsigned chromSize = getChromSize(database, gene->chrom); struct metaData* md = metaDataTblsFind(metaDataTbls, gene->name); if (gbVerbose >= 3) gbVerbMsg(3, "chkGenePred %s:%d %s %s", table, iRow, gene->name, gene->chrom); safef(desc, sizeof(desc), "gene %s.%s:%u %s %s", database, table, iRow, gene->name, gene->chrom); /* basic sanity checks */ if (genePredCheck(desc, stderr, chromSize, gene)) errorCnt++; /* check if in mrna table */ if (md == NULL) gbError("%s: %s in not in mrna table", desc, gene->name); else { if (typeFlags != md->typeFlags) gbError("%s: alignment of %s type %s doesn't match expected %s", desc, gene->name, gbFmtSelect(md->typeFlags), gbFmtSelect(typeFlags)); md->numAligns++; } /* check gene name */ if ((md != NULL) && (geneName != NULL)) { char* rlName = (md->rlName == NULL) ? "" : md->rlName; if (!sameString(geneName, rlName)) gbError("%s: %s geneName \"%s\" does not match refLink name \"%s\"", desc, gene->name, geneName, rlName); } }
static void loadRefLinkRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row) /* load a row of the refLink table */ { /* columns: mrnaAcc,name,product,protAcc,geneName,prodName,locusLinkId,omimId */ struct metaData* md; int iRow = 0; char *acc = row[iRow++]; char *product; if (!(startsWith("NM_", acc) || startsWith("NR_", acc))) { gbError("%s: non-NM_/NR_ mrnaAcc in refLink", acc); return; } md = metaDataTblsGet(metaDataTbls, acc); if (md->inRefLink) gbError("%s: occurs multiple times in the refLink table", md->acc); md->inRefLink = TRUE; safef(md->rlName, sizeof(md->rlName), "%s", row[iRow++]); product = row[iRow++]; safef(md->rlProtAcc, sizeof(md->rlProtAcc), row[iRow++]); /* check if ids are valid (zero is allowed, so just parse) */ strToUnsigned(row[iRow++], md->acc, "refLink.geneName", NULL); strToUnsigned(row[iRow++], md->acc, "refLink.prodName", NULL); strToUnsigned(row[iRow++], md->acc, "refLink.locusLinkId", NULL); strToUnsigned(row[iRow++], md->acc, "refLink.omimId", NULL); if (gbIsProteinCodingRefSeq(md->acc)) { if (strlen(md->rlProtAcc) == 0) gbError("%s: empty protein acc in refLink", acc); else { metaDataTblsAddProtAcc(metaDataTbls, md); md->hasProt = TRUE; } } }
static void loadRefSeqStatusRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row) /* load a row of the refSeqStatus table */ { int i; char* rssStatus; struct metaData* md; /* columns: mrnaAcc,status */ md = metaDataTblsGet(metaDataTbls, row[0]); if (md->inRefSeqStatus) gbError("%s: occurs multiple times in the refSeqStatus table", md->acc); md->inRefSeqStatus = TRUE; rssStatus = row[1]; for (i = 0; (validRefSeqStatus[i] != NULL) && !sameString(rssStatus, validRefSeqStatus[i]); i++) continue; if (validRefSeqStatus[i] == NULL) gbError("%s: invalid refSeqStatus.status", md->acc); }
static void checkExtRecord(struct seqFields *seq, char *extPath) /* Check the external file record for a sequence (slow). Assumes * that bounds have been sanity check for a file. */ { /* read range into buffer */ FILE *fh = mustOpen(extPath, "r"); char *faBuf; char accVer[GB_ACC_BUFSZ]; struct dnaSeq *dnaSeq; if (fseeko(fh, seq->file_offset, SEEK_SET) < 0) { gbError("%s: can't seek %s", seq->acc, extPath); carefulClose(&fh); } faBuf = needMem(seq->file_size+1); mustRead(fh, faBuf, seq->file_size); faBuf[seq->file_size] = '\0'; carefulClose(&fh); /* verify contents */ if (faBuf[0] != '>') { gbError("%s: gbExtFile offset %lld doesn't start a fasta record: %s", seq->acc, (long long)seq->file_offset, extPath); free(faBuf); return; } dnaSeq = faFromMemText(faBuf); safef(accVer, sizeof(accVer), "%s.%d", seq->acc, seq->version); if (!sameString(dnaSeq->name, accVer)) gbError("%s: name in fasta header \"%s\" doesn't match expected \"%s\": %s", seq->acc, dnaSeq->name, accVer, extPath); if (dnaSeq->size != seq->size) gbError("%s: size of fasta sequence (%d) doesn't match expected (%d): %s", seq->acc, dnaSeq->size, seq->size, extPath); freeDnaSeq(&dnaSeq); }
void checkEst(struct gbRelease* mrnaRelease, struct gbEntry* entry, struct gbSelect* prevSelect) /* Check an EST, check for type change and orgCat change for * any of genomes in use */ { struct gbEntry* mrnaEntry = gbReleaseFindEntry(mrnaRelease, entry->acc); if (mrnaEntry != NULL) { /* type changed, output in format for ignore.idx */ if (mrnaEntry->processed->modDate > entry->processed->modDate) gbError("%s\t%s\t%s\t%s changes type EST to mRNA", mrnaEntry->acc, gbFormatDate(entry->processed->modDate), gbSrcDbName(mrnaRelease->srcDb), gbFormatDate(mrnaEntry->processed->modDate)); else gbError("%s\t%s\t%s\t%s changes type mRNA to EST", mrnaEntry->acc, gbFormatDate(mrnaEntry->processed->modDate), gbSrcDbName(mrnaRelease->srcDb), gbFormatDate(entry->processed->modDate)); } checkOrgCat(entry, prevSelect); }
static void chkAlignCount(struct metaData* md, struct metaDataTbls* metaDataTbls, char* dbTableDesc, unsigned typeFlags) /* check alignment counts or gene counts for an accession if it matches the * type, also reset the counts. */ { if (typeFlags == md->typeFlags) { if (md->numAligns != md->gbsNumAligns) gbError("%s: number of alignments found in %s (%d) does not match " "expected (%d) from gbStatus", md->acc, dbTableDesc, md->numAligns, md->gbsNumAligns); } md->numAligns = 0; /* reset counts */ }
static void chkGenePredTable(struct gbSelect* select, struct sqlConnection* conn, char* table, boolean isRefFlat, struct metaDataTbls* metaDataTbls, unsigned typeFlags) /* Validate a genePred table. Also count the number of genePreds for a * mrna. If this is refFlat, also check the geneName. Return numbner of * rows. */ { gbVerbEnter(3, "chkGenePredTable %s", table); if (!sqlTableExists(conn, table)) gbError("no genePred table %s.%s", select->release->genome->database, table); else chkGenePredRows(select, conn, table, isRefFlat, metaDataTbls, typeFlags); gbVerbLeave(3, "chkGenePredTable %s", table); }
unsigned strToUnsigned(char* str, char* acc, char* useMsg, boolean* gotError) /* Parse a string into an unsigned. */ { char* stop; unsigned num = 0; num = strtoul(str, &stop, 10); if ((*stop != '\0') || (stop == str)) { gbError("%s: invalid unsigned \"%s\": %s ", acc, str, useMsg); if (gotError != NULL) *gotError = TRUE; } else if (gotError != NULL) *gotError = FALSE; return num; }
static void loadGbCdnaInfoRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row) /* load one row from the gbCdnaInfo table */ { struct metaData* md; int len, numNonZero, iRow = 0; char *acc, *dir; boolean gotError, isOk; /* columns: acc,id,moddate,version,moddate,type */ acc = row[iRow++]; md = metaDataTblsGet(metaDataTbls, acc); if (md->inGbCdnaInfo) { gbError("%s: acc occurs multiple times in the mrna table", acc); return; } md->inGbCdnaInfo = TRUE; md->gbCdnaInfoId = strToUnsigned(row[iRow++], acc, "gbCdnaInfo.id", NULL); len = strlen(acc); md->gbCdnaInfoVersion = strToUnsigned(row[iRow++], "gbCdnaInfo.version", acc, &gotError); if (!gotError && (md->gbCdnaInfoVersion <= 0)) gbError("%s: gbCdnaInfo.version invalid: \"%d\"", acc, md->gbCdnaInfoVersion); isOk = TRUE; md->gbCdnaInfoModdate = gbParseChkDate(row[iRow++], &isOk); if (!isOk) gbError("%s: invalid gbCdnaInfo.moddate value: \"%s\"", acc, row[iRow-1]); md->gbCdnaInfoType = gbParseType(row[iRow++]); md->typeFlags |= md->gbCdnaInfoType; dir = row[iRow++]; if ((strlen(dir) > 1) || (strchr("053", *dir) == NULL)) gbError("%s: invalid gbCdnaInfo.direction value: \"%s\"", acc, dir); /* Make sure that at least a few of the id fields have data */ numNonZero = 0; while (iRow < 20) { int id = strToUnsigned(row[iRow++], md->acc, "gbCdnaInfo.?", NULL); if (id > 0) numNonZero++; /* remember if we have a description */ if (iRow-1 == 16) md->haveDesc = (id != 0); } if (numNonZero == 0) gbError("%s: none of gbCdnaInfo string ids have non-zero values", dir); else if (numNonZero < 4) gbError("%s: only %d of gbCdnaInfo string ids have non-zero values", dir, numNonZero); }
void chkMetaDataGbEntry(struct metaDataTbls* metaDataTbls, struct gbSelect* select, struct gbEntry* entry) /* Check metadata against a gbEntry object */ { struct metaData* md = metaDataTblsGet(metaDataTbls, entry->acc); md->inGbIndex = TRUE; md->inGbAlign = (entry->aligned != NULL); md->isNative = (entry->orgCat == GB_NATIVE); /* FIXME: dup field */ md->typeFlags |= ((entry->orgCat == GB_NATIVE) ? GB_NATIVE : GB_XENO); md->excluded = ((entry->orgCat & select->orgCats) == 0); if (md->inGbStatus) { if (md->excluded) gbError("%s: excluded (%s) entry should not be in gbStatus table", gbOrgCatName(entry->orgCat), entry->acc); else chkGbStatusGbEntry(select, entry, md); } }
void checkProcOrgCat(struct gbEntry* entry, struct gbProcessed* proc0, char *org0, struct gbProcessed* proc, struct slTime** reported) /* Check for organism category changing from a give processed entry * to the latest entry. Report error if not already reported */ { char* org = gbGenomePreferedOrgName(proc->organism); /* name in static table, so can compare ptrs. NULL is returned * for organism we don't know about. change from NULL to not * NULL also a orgCat change. */ if ((org != org0) && !slTimeHave(*reported, proc->modDate)) { gbError("%s\t%s\t%s\t%s changes organism \"%s\" to \"%s\"", entry->acc, gbFormatDate(proc->modDate), gbSrcDbName(entry->processed->update->release->srcDb), gbFormatDate(proc0->modDate), proc->organism, proc0->organism); slSafeAddHead(reported, slTimeNew(proc->modDate)); } }
static struct extFile* getExtFile(char *acc, struct extFileTbl* extFileTbl, HGID seqExtFile) /* get the extFile by id, reporting if not in table */ { struct extFile* extFile = extFileTblFindById(extFileTbl, seqExtFile); /* check if in ext file table */ if (extFile == NULL) { /* only reported the first time */ char seqExtStr[32]; safef(seqExtStr, sizeof(seqExtStr), "%d", seqExtFile); if (!alreadyReported(missingExtFileIds, seqExtStr)) { gbError("%s: gbSeq.gbExtFile (%d) not in gbExtFile table", acc, seqExtFile); flagReported(missingExtFileIds, seqExtStr); } } return extFile; }
static void chkPsl(struct psl* psl, unsigned iRow, char* database, char* table, struct metaDataTbls* metaDataTbls, unsigned typeFlags) /* Validate a PSL of a mrna/est to genome alignment against the metadata. * Also count the number of alignments of a mrna. */ { unsigned chromSize = getChromSize(database, psl->tName); struct metaData* md = metaDataTblsFind(metaDataTbls, psl->qName); char pslDesc[128]; if (gbVerbose >= 3) gbVerbMsg(3, "chkPsl %s:%d %s %s", table, iRow, psl->qName, psl->tName); safef(pslDesc, sizeof(pslDesc), "psl %s.%s row %u", database, table, iRow); /* check that we have sequence info and compare sizes sizes */ if (chromSize == 0) gbError("%s: tName not a valid chromosome: \"%s\"", pslDesc, psl->tName); else if (chromSize != psl->tSize) gbError("%s: tSize %u != chromosome %s size %u", pslDesc, psl->tSize, psl->tName, chromSize); if (md == NULL) gbError("%s: qName not in mrna table as type %s: \"%s\"", pslDesc, gbFmtSelect(typeFlags & GB_TYPE_MASK), psl->qName); else if (md->inSeq) { if (!md->inGbIndex) gbError("%s: qName not in gbIndex as type %s: \"%s\"" " (Note: this can be caused by GenBank entries that were changed from type mRNA to other RNA types)", pslDesc, gbFmtSelect(typeFlags & GB_TYPE_MASK), psl->qName); else { if (typeFlags != md->typeFlags) gbError("%s: alignment for %s type %s doesn't match expected %s", pslDesc, psl->qName, gbFmtSelect(md->typeFlags), gbFmtSelect(typeFlags)); } if (md->seqSize != psl->qSize) gbError("%s: qSize %u != %s size %u", pslDesc, psl->qSize, psl->qName, md->seqSize); md->numAligns++; } /* validate consistency of PSL */ if (pslCheck(pslDesc, stderr, psl)) errorCnt++; }
static void loadGbStatusRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row, unsigned descOrgCats) /* load a row of the gbStatus table */ { struct metaData* md; int iRow = 0; boolean isOk; HGID seqId; /* columns: acc,version,modDate,type,srcDb,gbSeq,numAligns */ md = metaDataTblsGet(metaDataTbls, row[iRow++]); if (md->inGbStatus) gbError("%s: occurs multiple times in the gbStatus table", md->acc); md->inGbStatus = TRUE; md->gbsVersion = strToUnsigned(row[iRow++], md->acc, "gbStatus.version", NULL); isOk = TRUE; md->gbsModDate = gbParseChkDate(row[iRow++], &isOk); if (!isOk) gbError("%s: invalid gbStatus.moddate value: \"%s\"", md->acc, row[iRow-1]); md->gbsType = gbParseType(row[iRow++]); md->gbsSrcDb = gbParseSrcDb(row[iRow++]); md->gbsOrgCat = gbParseOrgCat(row[iRow++]); seqId = strToUnsigned(row[iRow++], md->acc, "gbStatus.gbSeq", NULL); md->gbsNumAligns = strToUnsigned(row[iRow++], md->acc, "gbStatus.numAligns", NULL); md->typeFlags |= md->gbsType; if (md->inGbCdnaInfo) { if (seqId != md->gbCdnaInfoId) gbError("%s: gbStatus.gbSeq (%d) not same gbCdnaInfo.id (%d)", md->acc, seqId, md->gbCdnaInfoId); if (md->gbsType != md->gbCdnaInfoType) gbError("%s: gbStatus.type (%s) not same as gbCdnaInfo.type (%s)", md->acc, gbFmtSelect(md->gbsType), gbFmtSelect(md->gbCdnaInfoType)); if (md->gbsSrcDb != (md->typeFlags & GB_SRC_DB_MASK)) gbError("%s: gbStatus.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", md->acc, gbFmtSelect(md->gbsSrcDb), gbFmtSelect(md->typeFlags)); if (md->gbsVersion != md->gbCdnaInfoVersion) gbError("%s: gbStatus.version (%d) not same gbCdnaInfo.version (%d)", md->acc, md->gbsVersion, md->gbCdnaInfoVersion); if ((md->gbsModDate != md->gbCdnaInfoModdate)) gbError("%s: gbStatus.modDate (%s) not same gbCdnaInfo.moddate (%s)", md->acc, gbFormatDate(md->gbsModDate), gbFormatDate(md->gbCdnaInfoModdate)); /* verify either have or don't have a description */ if (descOrgCats & md->gbsOrgCat) { if (!md->haveDesc) gbError("%s: should have gbCdnaInfo.description: %s", md->acc, gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb)); } else { if (md->haveDesc) gbError("%s: should not have gbCdnaInfo.description: %s", md->acc, gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb)); } } }
static void chkPslTable(struct gbSelect* select, struct sqlConnection* conn, char* rootTable, char* chrom, struct metaDataTbls* metaDataTbls, unsigned typeFlags) /* Validate a PSL of a mrna/est to genome alignment against the metadata. If * not a chromosome-specific table, chrom should be null. Chromosome-specific * tables are not required to exist (for testing purposes). Also count the * number of alignments of a mrna. */ { struct hTableInfo* tableInfo; char table[64]; unsigned iRow = 0; unsigned rowOffset; char accWhere[64]; char query[512]; struct sqlResult *sr; char **row; /* need to specify an explicit chrom table, as there is an mrna table which is * not psl, so using mrna as a root name with a chrom that doesn't exist * returns the mrna instead of null */ if (chrom != NULL) safef(table, sizeof(table), "%s_%s", chrom, rootTable); else safef(table, sizeof(table), "%s", rootTable); gbVerbEnter(3, "chkPslTable %s", table); tableInfo = hFindTableInfo(select->release->genome->database, chrom, table); if (tableInfo == NULL) { /* If all table, require it */ if (chrom == NULL) { if (testMode) fprintf(stderr, "Warning: no psl table %s.%s\n", select->release->genome->database, table); else gbError("no psl table %s.%s", select->release->genome->database, table); } } else { rowOffset = (tableInfo->hasBin) ? 1 : 0; // FIXME: might be better as sqlDyString accWhere[0] = '\0'; if (select->accPrefix != NULL) sqlSafefFrag(accWhere, sizeof(accWhere), " WHERE qName LIKE '%s%%'", select->accPrefix); sqlSafef(query, sizeof(query), "SELECT * FROM %s%-s", table, accWhere); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct psl* psl = pslLoad(row+rowOffset); chkPsl(psl, iRow, select->release->genome->database, table, metaDataTbls, typeFlags); pslFree(&psl); iRow++; } sqlFreeResult(&sr); } gbVerbLeave(3, "chkPslTable %s", table); }