static void loadSeqCDnaRow(struct metaDataTbls* metaDataTbls,
                           struct extFileTbl* extFileTbl,
                           boolean checkExtSeqRecs,
                           char* gbdbMapToCurrent,
                           struct sqlConnection* conn, char **row)
/* load one row for a cDNA from the seq table */
{
struct seqFields seq;
struct metaData* md;
parseGbSeqRow(row, &seq);
md = metaDataTblsGet(metaDataTbls, seq.acc);
if (md->inSeq)
    gbError("%s: acc occurs multiple times in the seq table", seq.acc);
md->inSeq = TRUE;
md->seqSize = seq.size;

if (md->inGbCdnaInfo)
    {
    if (seq.id != md->gbCdnaInfoId)
        gbError("%s: gbSeq.id (%d) not same gbCdnaInfo.id (%d)", seq.acc, seq.id, md->gbCdnaInfoId);
    if (seq.type != md->gbCdnaInfoType)
        gbError("%s: gbSeq.type (%s) not same as gbCdnaInfo.type (%s)", seq.acc,
                gbFmtSelect(seq.type), gbFmtSelect(md->gbCdnaInfoType));
    if ((seq.srcDb & md->typeFlags) == 0)
        gbError("%s: gbSeq.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", seq.acc,
                gbFmtSelect(seq.srcDb), gbFmtSelect(md->typeFlags));
    if (md->seqSize >= seq.file_size)
        gbError("%s: gbSeq.size >= gbSeq.file_size", seq.acc);
    }

if (verifySeqExtFile(&seq, extFileTbl, checkExtSeqRecs, gbdbMapToCurrent))
    md->inExtFile = TRUE;
}
static void loadSeqPepRow(struct metaDataTbls* metaDataTbls,
                          struct extFileTbl* extFileTbl,
                          boolean checkExtSeqRecs,
                          char* gbdbMapToCurrent,
                          struct sqlConnection* conn, char **row)
/* load one row for a  from the seq table */
{
struct seqFields seq;
struct metaData* md;
parseGbSeqRow(row, &seq);
md = metaDataTblsGetByPep(metaDataTbls, seq.acc);
if (md == NULL)
    {
#if 0 //FIXME: disabled due to known and harmless bug
    gbError("%s: peptide in gbSeq not found in refLink", seq.acc);
#endif
    }
else
    {
    if (md->protInSeq)
        gbError("%s: acc occurs multiple times in the seq table", seq.acc);
    md->protInSeq = TRUE;
    if (verifySeqExtFile(&seq, extFileTbl, checkExtSeqRecs, gbdbMapToCurrent))
        md->protInExtFile = TRUE;
    }
}
static void chkGbStatusGbEntry(struct gbSelect* select, struct gbEntry* entry,
                               struct metaData* md)
/* check entry fields against status fields */
{
/* processed entry should be the one matching the aligned update */
struct gbAligned* aligned = gbEntryFindAlignedVer(entry,
                                                  md->gbsVersion);
if (aligned == NULL)
    gbError("%s.%d: no genbank gbIndex aligned object for gbStatus",
            md->acc, md->gbsVersion);
else
    {
    /* search for a processed entry matching this data and version */
    struct gbProcessed* processed = entry->processed;
    while ((processed != NULL) &&
           !((processed->modDate == md->gbsModDate)
             && (processed->version == md->gbsVersion)))
        processed = processed->next;
    if (processed == NULL)
        gbError("%s: no gbIndex processed entry for version %d, moddate %s, update %s",
                md->acc, md->gbsVersion, gbFormatDate(md->gbsModDate),
                aligned->update->name);
    if (aligned->numAligns != md->gbsNumAligns)
        gbError("%s.%d: genbank index number of alignments (%d) does not match gbStatus (%d)",
                md->acc, md->gbsVersion, aligned->numAligns, md->gbsNumAligns);
    }
}
static boolean verifySeqExtFile(struct seqFields *seq,
                                struct extFileTbl* extFileTbl,
                                boolean checkExtSeqRecs,
                                char* gbdbMapToCurrent)
/* verify a seq table information with it's extFile entry. return true
 * if found in extFile table. */
{
#if 0 /* FIXME: for disabled code below */
int mapLen = (gbdbMapToCurrent != NULL) ? strlen(gbdbMapToCurrent) : 0;
#endif
char* extPath;
boolean badSeq = FALSE;
struct extFile* extFile = getExtFile(seq->acc, extFileTbl, seq->gbExtFile);
if (extFile == NULL)
    return FALSE; /* all that can be checked */

/* now, sanity check seq with extFile */
if ((seq->file_offset+seq->file_size) > extFile->size)
    {
    gbError("%s: gbSeq.file_offset+gbSeq.file_size > gbExtFile.size", seq->acc);
    badSeq = TRUE;
    }

/* map path to local directory if requested */
extPath = extFile->path;
#if 0 /* FIXME: load stores full path when redirected */
if ((gbdbMapToCurrent != NULL) && startsWith(gbdbMapToCurrent, extPath)
    && (extPath[mapLen] == '/'))
    extPath += mapLen+1;
#endif

/* check readability and size of file. if found unreadable before, don't repeat
 * message or rest of tests. */
if (alreadyReported(missingExtFiles, extPath))
    return TRUE;  /* already reported, nothing more to do */

if (access(extPath, R_OK) < 0)
    {
    gbError("%s: extFile does not exist or is not readable: %s",
            seq->acc, extPath);
    flagReported(missingExtFiles, extPath);
    badSeq = TRUE;
    }
else if (fileSize(extPath) != extFile->size)
    {
    gbError("%s: disk file size (%lld) does not match ext.size (%lld): %s",
            seq->acc, (long long)fileSize(extPath), (long long)extFile->size, extPath);
    flagReported(missingExtFiles, extPath);
    badSeq = TRUE;
    }

if (!badSeq && checkExtSeqRecs)
    checkExtRecord(seq, extPath);
return TRUE;
}
示例#5
0
static void checkGbCdnaInfoStrKeys(struct sqlConnection* conn)
/* Verify that the ids appear valid for all of the unique string tables
 * referenced by the mrna table.  This does a join of the mrna with
 * all of the other tables.  If the number of results don't match
 * the number of rows in the table, some of the ids are wrong.
 */
{
static char *joinSql =
    "NOSQLINJ SELECT count(*) FROM "
    "gbCdnaInfo,author,cds,cell,description,development,geneName,"
    "keyword,library,mrnaClone,organism,productName,sex,"
    "source,tissue "
    "WHERE gbCdnaInfo.author=author.id AND gbCdnaInfo.cds=cds.id "
    "AND gbCdnaInfo.cell=cell.id AND gbCdnaInfo.description=description.id "
    "AND gbCdnaInfo.development=development.id AND gbCdnaInfo.geneName=geneName.id "
    "AND gbCdnaInfo.keyword=keyword.id AND gbCdnaInfo.library=library.id "
    "AND gbCdnaInfo.mrnaClone=mrnaClone.id AND gbCdnaInfo.organism=organism.id "
    "AND gbCdnaInfo.productName=productName.id AND gbCdnaInfo.sex=sex.id "
    "AND gbCdnaInfo.source=source.id AND gbCdnaInfo.tissue=tissue.id";
unsigned numJoinRows = sqlQuickNum(conn, joinSql);
unsigned numTotalRows = sqlQuickNum(conn, "NOSQLINJ SELECT count(*) FROM gbCdnaInfo");

if (numJoinRows != numTotalRows)
    gbError("number of rows in gbCdnaInfo join with string tables does (%u) "
            "does not match total in table (%u), something is wrong",
            numJoinRows, numTotalRows);
}
static void checkXRef(struct metaData* md)
/* check sanity of collected metadata */
{
if (!md->inGbAlign)
    return;  /* can't check anything else */
if (!md->inGbCdnaInfo)
    gbError("%s: not in gbCdnaInfo table, referenced in %s", md->acc,
            getTablesDesc(md));
if (!md->inSeq)
    gbError("%s: not in seq table, referenced in %s", md->acc,
            getTablesDesc(md));
if (!md->inExtFile)
    gbError("%s: not in gbExtFile table, referenced in %s", md->acc,
            getTablesDesc(md));

if (md->typeFlags & GB_REFSEQ)
    {
    checkXRefRefSeq(md);
    }
else
    {
    if (md->inRefSeqStatus)
        gbError("%s: in refSeqStatus table, and not RefSeq acc", md->acc);
    if (md->inRefLink)
        gbError("%s: in refLink table, and not RefSeq acc", md->acc);
    }
if (!md->inGbStatus)
    gbError("%s: not in gbStatus table, referenced in %s", md->acc,
            getTablesDesc(md));
if (!md->inGbIndex)
    gbError("%s: not in gbIndex, referenced in %s", md->acc,
            getTablesDesc(md));
}
off_t strToOffset(char* str, char* acc, char* useMsg)
/* Parse a string into an offset_t. */
{
char* stop;
off_t num = 0;
num = strtoull(str, &stop, 10);
if ((*stop != '\0') || (stop == str))
    gbError("%s: invalid offset \"%s\": %s ", acc, str, useMsg);
return num;
}
static void checkXRefRefSeq(struct metaData* md)
/* check sanity of collected additional metadata for RefSeq */
{
if (!md->inRefSeqStatus)
    gbError("%s: not in refSeqStatus table, and is RefSeq acc", md->acc);
if (!md->inRefLink)
    gbError("%s: not in refLink table, and is RefSeq acc ", md->acc);
if (gbIsProteinCodingRefSeq(md->acc))
    {
    if (!md->hasProt)
        gbError("%s: no peptide for RefSeq", md->acc);
    else 
        {
        if (!md->protInSeq)
            gbError("%s: RefSeq peptide %s not in gbSeq table", md->acc, md->rlProtAcc);
        else if (!md->protInExtFile)
            gbError("%s: RefSeq peptide %s not in gbExtFile table", md->acc, md->rlProtAcc);
        }
    }
}
示例#9
0
static void chkGenePred(struct genePred* gene, char *geneName, unsigned iRow,
                        char* database, char* table,
                        struct metaDataTbls* metaDataTbls, unsigned typeFlags)
/* Validate a genePred of a refSeq to genome alignment against the metadata.
 * Also count the number of alignments, and check the geneName, if available */
{
char desc[512];
unsigned chromSize = getChromSize(database, gene->chrom);
struct metaData* md = metaDataTblsFind(metaDataTbls, gene->name);

if (gbVerbose >= 3)
    gbVerbMsg(3, "chkGenePred %s:%d %s %s",  table, iRow, 
              gene->name, gene->chrom);
safef(desc, sizeof(desc), "gene %s.%s:%u %s %s", database, table,
      iRow, gene->name, gene->chrom);

/* basic sanity checks */
if (genePredCheck(desc, stderr, chromSize, gene))
    errorCnt++;

/* check if in mrna table */
if (md == NULL)
    gbError("%s: %s in not in mrna table", desc, gene->name);
else
    {
    if (typeFlags != md->typeFlags)
        gbError("%s: alignment of %s type %s doesn't match expected %s",
                desc, gene->name, gbFmtSelect(md->typeFlags),
                gbFmtSelect(typeFlags));
    md->numAligns++;
    }

/* check gene name */
if ((md != NULL) && (geneName != NULL))
    {
    char* rlName = (md->rlName == NULL) ? "" : md->rlName;
    if (!sameString(geneName, rlName))
        gbError("%s: %s geneName \"%s\" does not match refLink name \"%s\"",
                desc, gene->name, geneName, rlName);
    }
}
static void loadRefLinkRow(struct metaDataTbls* metaDataTbls,
                           struct sqlConnection* conn, char** row)
/* load a row of the refLink table */
{
/* columns: mrnaAcc,name,product,protAcc,geneName,prodName,locusLinkId,omimId */
struct metaData* md;
int iRow = 0;
char *acc = row[iRow++];
char *product;

if (!(startsWith("NM_", acc) || startsWith("NR_", acc)))
    {
    gbError("%s: non-NM_/NR_ mrnaAcc in refLink", acc);
    return;
    }

md = metaDataTblsGet(metaDataTbls, acc);
if (md->inRefLink)
    gbError("%s: occurs multiple times in the refLink table", md->acc);
md->inRefLink = TRUE;
safef(md->rlName, sizeof(md->rlName), "%s", row[iRow++]);
product = row[iRow++];
safef(md->rlProtAcc, sizeof(md->rlProtAcc), row[iRow++]);

/* check if ids are valid (zero is allowed, so just parse) */
strToUnsigned(row[iRow++], md->acc, "refLink.geneName", NULL);
strToUnsigned(row[iRow++], md->acc, "refLink.prodName", NULL);
strToUnsigned(row[iRow++], md->acc, "refLink.locusLinkId", NULL);
strToUnsigned(row[iRow++], md->acc, "refLink.omimId", NULL);

if (gbIsProteinCodingRefSeq(md->acc))
    {
    if (strlen(md->rlProtAcc) == 0)
        gbError("%s: empty protein acc in refLink", acc);
    else
        {
        metaDataTblsAddProtAcc(metaDataTbls, md);
        md->hasProt = TRUE;
        }
    }
}
static void loadRefSeqStatusRow(struct metaDataTbls* metaDataTbls,
                                struct sqlConnection* conn,
                                char** row)
/* load a row of the refSeqStatus table */
{
int i;
char* rssStatus;
struct metaData* md;

/* columns: mrnaAcc,status */
md = metaDataTblsGet(metaDataTbls, row[0]);
if (md->inRefSeqStatus)
    gbError("%s: occurs multiple times in the refSeqStatus table", md->acc);
md->inRefSeqStatus = TRUE;
rssStatus = row[1];
for (i = 0; (validRefSeqStatus[i] != NULL) 
         && !sameString(rssStatus, validRefSeqStatus[i]); i++)
    continue;
if (validRefSeqStatus[i] == NULL)
    gbError("%s: invalid refSeqStatus.status", md->acc);
}
static void checkExtRecord(struct seqFields *seq,
                           char *extPath)
/* Check the external file record for a sequence (slow). Assumes
 * that bounds have been sanity check for a file. */
{
/* read range into buffer */
FILE *fh = mustOpen(extPath, "r");
char *faBuf;
char accVer[GB_ACC_BUFSZ];
struct dnaSeq *dnaSeq;
if (fseeko(fh, seq->file_offset, SEEK_SET) < 0)
    {
    gbError("%s: can't seek %s", seq->acc, extPath);
    carefulClose(&fh);
    }
faBuf = needMem(seq->file_size+1);
mustRead(fh, faBuf, seq->file_size);
faBuf[seq->file_size] = '\0';
carefulClose(&fh);

/* verify contents */
if (faBuf[0] != '>')
    {
    gbError("%s: gbExtFile offset %lld doesn't start a fasta record: %s",
            seq->acc, (long long)seq->file_offset, extPath);
    free(faBuf);
    return;
    }
dnaSeq = faFromMemText(faBuf);
safef(accVer, sizeof(accVer), "%s.%d", seq->acc, seq->version);

if (!sameString(dnaSeq->name, accVer))
    gbError("%s: name in fasta header \"%s\" doesn't match expected \"%s\": %s",
            seq->acc, dnaSeq->name, accVer, extPath);
if (dnaSeq->size != seq->size)
    gbError("%s: size of fasta sequence (%d) doesn't match expected (%d): %s",
            seq->acc, dnaSeq->size, seq->size, extPath);
freeDnaSeq(&dnaSeq);
}
void checkEst(struct gbRelease* mrnaRelease,
              struct gbEntry* entry,
              struct gbSelect* prevSelect)
/* Check an EST, check for type change and orgCat change for
 * any of genomes in use */
{
struct gbEntry* mrnaEntry = gbReleaseFindEntry(mrnaRelease, entry->acc);
if (mrnaEntry != NULL)
    {
    /* type changed, output in format for ignore.idx */
    if (mrnaEntry->processed->modDate > entry->processed->modDate)
        gbError("%s\t%s\t%s\t%s changes type EST to mRNA",
                mrnaEntry->acc, gbFormatDate(entry->processed->modDate),
                gbSrcDbName(mrnaRelease->srcDb),
                gbFormatDate(mrnaEntry->processed->modDate));
    else
        gbError("%s\t%s\t%s\t%s changes type mRNA to EST",
                mrnaEntry->acc, gbFormatDate(mrnaEntry->processed->modDate),
                gbSrcDbName(mrnaRelease->srcDb),
                gbFormatDate(entry->processed->modDate));
    }
checkOrgCat(entry, prevSelect);
}
示例#14
0
static void chkAlignCount(struct metaData* md, struct metaDataTbls* metaDataTbls,
                          char* dbTableDesc, unsigned typeFlags)
/* check alignment counts or gene counts for an accession if it matches the
 * type, also reset the counts. */
{
if (typeFlags == md->typeFlags)
    {
    if (md->numAligns != md->gbsNumAligns)
        gbError("%s: number of alignments found in %s (%d) does not match "
                "expected (%d) from gbStatus",
                md->acc, dbTableDesc, md->numAligns, md->gbsNumAligns);
    }
md->numAligns = 0; /* reset counts */
}
示例#15
0
static void chkGenePredTable(struct gbSelect* select,
                             struct sqlConnection* conn,
                             char* table, boolean isRefFlat, 
                             struct metaDataTbls* metaDataTbls,
                             unsigned typeFlags)
/* Validate a genePred table.  Also count the number of genePreds for a
 * mrna.  If this is refFlat, also check the geneName.  Return numbner of
 * rows. */
{
gbVerbEnter(3, "chkGenePredTable %s", table);
if (!sqlTableExists(conn, table))
    gbError("no genePred table %s.%s", select->release->genome->database,
            table);
else
    chkGenePredRows(select, conn, table, isRefFlat, metaDataTbls, typeFlags);
gbVerbLeave(3, "chkGenePredTable %s", table);
}
unsigned strToUnsigned(char* str, char* acc, char* useMsg,
                       boolean* gotError)
/* Parse a string into an unsigned. */
{
char* stop;
unsigned num = 0;
num = strtoul(str, &stop, 10);
if ((*stop != '\0') || (stop == str))
    {
    gbError("%s: invalid unsigned \"%s\": %s ", acc, str, useMsg);
    if (gotError != NULL)
        *gotError = TRUE;
    }
else
    if (gotError != NULL)
        *gotError = FALSE;
return num;
}
static void loadGbCdnaInfoRow(struct metaDataTbls* metaDataTbls,
                              struct sqlConnection* conn, char** row)
/* load one row from the gbCdnaInfo table */
{
struct metaData* md;
int len, numNonZero, iRow = 0;
char *acc, *dir;
boolean gotError, isOk;

/* columns: acc,id,moddate,version,moddate,type */
acc = row[iRow++];
md = metaDataTblsGet(metaDataTbls, acc);
if (md->inGbCdnaInfo)
    {
    gbError("%s: acc occurs multiple times in the mrna table", acc);
    return;
    }
md->inGbCdnaInfo = TRUE;
md->gbCdnaInfoId = strToUnsigned(row[iRow++], acc, "gbCdnaInfo.id", NULL);
len = strlen(acc);
md->gbCdnaInfoVersion = strToUnsigned(row[iRow++], "gbCdnaInfo.version", acc, &gotError);
if (!gotError && (md->gbCdnaInfoVersion <= 0))
     gbError("%s: gbCdnaInfo.version invalid: \"%d\"", acc, md->gbCdnaInfoVersion);
isOk = TRUE;
md->gbCdnaInfoModdate = gbParseChkDate(row[iRow++], &isOk);
if (!isOk)
    gbError("%s: invalid gbCdnaInfo.moddate value: \"%s\"", acc, row[iRow-1]);
md->gbCdnaInfoType = gbParseType(row[iRow++]);
md->typeFlags |= md->gbCdnaInfoType;

dir = row[iRow++];
if ((strlen(dir) > 1) || (strchr("053", *dir) == NULL))
    gbError("%s: invalid gbCdnaInfo.direction value: \"%s\"", acc, dir);

/* Make sure that at least a few of the id fields have data  */
numNonZero = 0;
while (iRow < 20)
    {
    int id = strToUnsigned(row[iRow++], md->acc, "gbCdnaInfo.?", NULL);
    if (id > 0)
        numNonZero++;
    /* remember if we have a description */
    if (iRow-1 == 16)
        md->haveDesc = (id != 0);
    }
if (numNonZero == 0)
    gbError("%s: none of gbCdnaInfo string ids have non-zero values", dir);
else if (numNonZero < 4)
    gbError("%s: only %d of gbCdnaInfo string ids have non-zero values",
            dir, numNonZero);
}
void chkMetaDataGbEntry(struct metaDataTbls* metaDataTbls,
                        struct gbSelect* select, struct gbEntry* entry)
/* Check metadata against a gbEntry object */
{
struct metaData* md = metaDataTblsGet(metaDataTbls, entry->acc);
md->inGbIndex = TRUE;
md->inGbAlign = (entry->aligned != NULL);
md->isNative = (entry->orgCat == GB_NATIVE);  /* FIXME: dup field */
md->typeFlags |= ((entry->orgCat == GB_NATIVE) ? GB_NATIVE : GB_XENO);
md->excluded = ((entry->orgCat & select->orgCats) == 0);

if (md->inGbStatus)
    {
    if (md->excluded)
        gbError("%s: excluded (%s) entry should not be in gbStatus table",
                gbOrgCatName(entry->orgCat), entry->acc);
    else
        chkGbStatusGbEntry(select, entry, md);
    }
}
void checkProcOrgCat(struct gbEntry* entry, struct gbProcessed* proc0, char *org0,
                     struct gbProcessed* proc, struct slTime** reported)
/* Check for organism category changing from a give processed entry
 * to the latest entry. Report error if not already reported */
{
char* org = gbGenomePreferedOrgName(proc->organism);
/* name in static table,  so can compare ptrs. NULL is returned
 * for organism we don't know about. change from NULL to not
 * NULL also a orgCat change. */
if ((org != org0) && !slTimeHave(*reported, proc->modDate))
    {
    gbError("%s\t%s\t%s\t%s changes organism \"%s\" to \"%s\"",
            entry->acc, 
            gbFormatDate(proc->modDate),
            gbSrcDbName(entry->processed->update->release->srcDb),
            gbFormatDate(proc0->modDate),
            proc->organism,
            proc0->organism);
    slSafeAddHead(reported, slTimeNew(proc->modDate));
    }
}
static struct extFile* getExtFile(char *acc, struct extFileTbl* extFileTbl,
                                  HGID seqExtFile)
/* get the extFile by id, reporting if not in table */
{
struct extFile* extFile = extFileTblFindById(extFileTbl, seqExtFile);

/* check if in ext file table */
if (extFile == NULL)
    {
    /* only reported the first time */
    char seqExtStr[32];
    safef(seqExtStr, sizeof(seqExtStr), "%d", seqExtFile);
    if (!alreadyReported(missingExtFileIds, seqExtStr))
        {
        gbError("%s: gbSeq.gbExtFile (%d) not in gbExtFile table", acc,
                seqExtFile);
        flagReported(missingExtFileIds, seqExtStr);
        }
    }
return extFile;
}
示例#21
0
static void chkPsl(struct psl* psl, unsigned iRow, char* database,
                   char* table, struct metaDataTbls* metaDataTbls,
                   unsigned typeFlags)
/* Validate a PSL of a mrna/est to genome alignment against the metadata.
 * Also count the number of alignments of a mrna. */
{
unsigned chromSize = getChromSize(database, psl->tName);
struct metaData* md = metaDataTblsFind(metaDataTbls, psl->qName);
char pslDesc[128];
if (gbVerbose >= 3)
    gbVerbMsg(3, "chkPsl %s:%d %s %s",  table, iRow, psl->qName, psl->tName);

safef(pslDesc, sizeof(pslDesc), "psl %s.%s row %u", database, table, iRow);

/* check that we have sequence info and compare sizes sizes */
if (chromSize == 0)
    gbError("%s: tName not a valid chromosome: \"%s\"", pslDesc, psl->tName);
else
    if (chromSize != psl->tSize)
        gbError("%s: tSize %u != chromosome %s size %u",
                pslDesc, psl->tSize, psl->tName, chromSize);

if (md == NULL)
    gbError("%s: qName not in mrna table as type %s: \"%s\"",
            pslDesc, gbFmtSelect(typeFlags & GB_TYPE_MASK), psl->qName);
else if (md->inSeq)
    {
    if (!md->inGbIndex)
        gbError("%s: qName not in gbIndex as type %s: \"%s\""
                " (Note: this can be caused by GenBank entries that were changed from type mRNA to other RNA types)", pslDesc,
                gbFmtSelect(typeFlags & GB_TYPE_MASK), psl->qName);
    else
        {
        if (typeFlags != md->typeFlags)
            gbError("%s: alignment for %s type %s doesn't match expected %s",
                    pslDesc, psl->qName, gbFmtSelect(md->typeFlags),
                    gbFmtSelect(typeFlags));
        }
    if (md->seqSize != psl->qSize)
        gbError("%s: qSize %u != %s size %u",
                pslDesc, psl->qSize, psl->qName, md->seqSize);
    md->numAligns++;
    }

/* validate consistency of PSL */
if (pslCheck(pslDesc, stderr, psl))
    errorCnt++;
}
static void loadGbStatusRow(struct metaDataTbls* metaDataTbls,
                            struct sqlConnection* conn, char** row,
                            unsigned descOrgCats)
/* load a row of the gbStatus table */
{
struct metaData* md;
int iRow = 0;
boolean isOk;
HGID seqId;

/* columns: acc,version,modDate,type,srcDb,gbSeq,numAligns */

md = metaDataTblsGet(metaDataTbls, row[iRow++]);
if (md->inGbStatus)
    gbError("%s: occurs multiple times in the gbStatus table", md->acc);
md->inGbStatus = TRUE;
md->gbsVersion = strToUnsigned(row[iRow++], md->acc, "gbStatus.version", NULL);

isOk = TRUE;
md->gbsModDate = gbParseChkDate(row[iRow++], &isOk);
if (!isOk)
    gbError("%s: invalid gbStatus.moddate value: \"%s\"", md->acc, row[iRow-1]);

md->gbsType = gbParseType(row[iRow++]);
md->gbsSrcDb = gbParseSrcDb(row[iRow++]);
md->gbsOrgCat = gbParseOrgCat(row[iRow++]);
seqId = strToUnsigned(row[iRow++], md->acc, "gbStatus.gbSeq", NULL);
md->gbsNumAligns = strToUnsigned(row[iRow++], md->acc, "gbStatus.numAligns",
                                 NULL);

md->typeFlags |= md->gbsType;

if (md->inGbCdnaInfo)
    {
    if (seqId != md->gbCdnaInfoId)
        gbError("%s: gbStatus.gbSeq (%d) not same gbCdnaInfo.id (%d)", md->acc, seqId,
                md->gbCdnaInfoId);
    if (md->gbsType != md->gbCdnaInfoType)
        gbError("%s: gbStatus.type (%s) not same as gbCdnaInfo.type (%s)", md->acc,
                gbFmtSelect(md->gbsType), gbFmtSelect(md->gbCdnaInfoType));
    if (md->gbsSrcDb != (md->typeFlags & GB_SRC_DB_MASK))
        gbError("%s: gbStatus.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", md->acc,
                gbFmtSelect(md->gbsSrcDb), gbFmtSelect(md->typeFlags));
    if (md->gbsVersion != md->gbCdnaInfoVersion)
        gbError("%s: gbStatus.version (%d) not same gbCdnaInfo.version (%d)", md->acc,
                md->gbsVersion, md->gbCdnaInfoVersion);
    if ((md->gbsModDate != md->gbCdnaInfoModdate))
        gbError("%s: gbStatus.modDate (%s) not same gbCdnaInfo.moddate (%s)", md->acc,
                gbFormatDate(md->gbsModDate), gbFormatDate(md->gbCdnaInfoModdate));
    /* verify either have or don't have a description */
    if (descOrgCats & md->gbsOrgCat)
        {
        if (!md->haveDesc)
            gbError("%s: should have gbCdnaInfo.description: %s", md->acc,
                    gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb));
        }
    else
        {
        if (md->haveDesc)
            gbError("%s: should not have gbCdnaInfo.description: %s", md->acc,
                    gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb));
        }
    }
}
示例#23
0
static void chkPslTable(struct gbSelect* select, struct sqlConnection* conn,
                        char* rootTable, char* chrom,
                        struct metaDataTbls* metaDataTbls,
                        unsigned typeFlags)
/* Validate a PSL of a mrna/est to genome alignment against the metadata.  If
 * not a chromosome-specific table, chrom should be null.  Chromosome-specific
 * tables are not required to exist (for testing purposes).  Also count the
 * number of alignments of a mrna. */
{
struct hTableInfo* tableInfo;
char table[64];
unsigned iRow = 0;
unsigned rowOffset;
char accWhere[64];
char query[512];
struct sqlResult *sr;
char **row;

/* need to specify an explicit chrom table, as there is an mrna table which is
 * not psl, so using mrna as a root name with a chrom that doesn't exist
 * returns the mrna instead of null */

if (chrom != NULL)
    safef(table, sizeof(table), "%s_%s", chrom, rootTable);
else
    safef(table, sizeof(table), "%s", rootTable);

gbVerbEnter(3, "chkPslTable %s", table);

tableInfo = hFindTableInfo(select->release->genome->database, chrom, table);
if (tableInfo == NULL)
    {
    /* If all table, require it */
    if (chrom == NULL)
        {
        if (testMode)
            fprintf(stderr, "Warning: no psl table %s.%s\n",
                    select->release->genome->database, table);
        else
            gbError("no psl table %s.%s", select->release->genome->database,
                    table);
        }
    }
else
    {
    rowOffset = (tableInfo->hasBin) ? 1 : 0;
    // FIXME: might be better as sqlDyString
    accWhere[0] = '\0';
    if (select->accPrefix != NULL)
        sqlSafefFrag(accWhere, sizeof(accWhere), " WHERE qName LIKE '%s%%'",
              select->accPrefix);
    sqlSafef(query, sizeof(query), "SELECT * FROM %s%-s", table, accWhere);
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
        {
        struct psl* psl = pslLoad(row+rowOffset);
        chkPsl(psl, iRow, select->release->genome->database, table,
               metaDataTbls, typeFlags);
        pslFree(&psl);
        iRow++;
        }
    sqlFreeResult(&sr);
    }
gbVerbLeave(3, "chkPslTable %s", table);
}