Beispiel #1
0
static void processOi(struct gbSelect* select, struct estOrientInfo* oi)
/* process the next OI from an update OI file, possibly outputing
 * the alignment record */
{
char acc[GB_ACC_BUFSZ];
short version = gbSplitAccVer(oi->name, acc);

/* will return NULL on ignored sequences */
struct gbEntry* entry = gbReleaseFindEntry(select->release, acc);
if ((entry != NULL) && (version == entry->selectVer))
    {
    /* selected */
    if (!gInclVersion)
        strcpy(oi->name, acc);  /* remove version */
    estOrientInfoTabOut(oi, gOutOi);
    entry->clientFlags = TRUE; /* flag so we know we got it */
    }
/* trace if enabled */
if (gbVerbose >= 3)
    {
    if (entry == NULL)
        gbVerbPr(3, "no entry: %s.%d", acc, version);
    else if (entry->selectVer <= 0)
        gbVerbPr(3, "not selected: %s.%d", acc, version);
    else if (version != entry->selectVer)
        gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer);
    else
        gbVerbPr(3, "save: %s.%d", acc, version);
    }
}
Beispiel #2
0
struct gbEntry* getEntry(struct gbSelect* select, char* acc, char* refFile)
/* Get the entry obj for an accession referenced in a file.  If not found and
 * accession ignored, return NULL, otherwise it's and error.  This allows for
 * an ignored entry to be added after alignment.  Also check that the entry is
 * part of the current update.  This handles this an ignored accession being
 * found in a PSL, etc.
 */
{
struct gbEntry* entry = gbReleaseFindEntry(select->release, acc);
if (entry == NULL)
    {
    /* note: this isn't actually checking for a specific acc/moddate being
     * ignored, it's just not generating an error if the entry is not found
     * and it's ignored for any moddate. */
    if (gbIgnoreFind(select->release->ignore, acc) != NULL)
        return NULL;  // igored, assume the best
    errAbort("can't find accession \"%s\" in gbIndex, referenced in %s",
             acc, refFile);
    }

/* check for being in this update */
assert(select->update != NULL);
if (gbEntryFindUpdateProcessed(entry, select->update) == NULL)
    {
    assert(gbIgnoreFind(select->release->ignore, acc) != NULL);
    return NULL;  /* not in update, probably ignored */
    }
return entry;
}
Beispiel #3
0
void copyRefSeqPepFa(struct gbUpdate* update,
                     char* outDir, char *gbFile)
/* copy a subset of the RefSeq peptide file for the select genes */
{
struct gbRelease* release = update->release;
char faInPath[PATH_LEN];
char faOutPath[PATH_LEN];
struct lineFile* inLf;
boolean copying = FALSE;
FILE* outFh;
char* line;

/* change the .gbff.Z suffix to .fsa.Z */
if (!endsWith(gbFile, ".gbff.Z"))
    errAbort("expected a file ending in .gbff.Z, got: %s", gbFile);
strcpy(faInPath, gbFile);
faInPath[strlen(faInPath)-7] = '\0';
strcat(faInPath, ".fsa.Z");

strcpy(faOutPath, outDir);
strcat(faOutPath, "/");
strcat(faOutPath, faInPath);

verbose(1, "copying from %s\n", faInPath);

/* copy selected, don't bother with fa readers */
inLf = gzLineFileOpen(faInPath);
outFh = gbMustOpenOutput(faOutPath);

while (lineFileNext(inLf, &line, NULL))
    {
    if (line[0] == '>')
        {
        char *geneAcc = parsePepGeneAcc(line);
        struct gbEntry* entry = NULL;
        if (geneAcc != NULL)
            entry = gbReleaseFindEntry(release, geneAcc);
        copying = ((entry != NULL) && (entry->selectVer > 0));
	verbose(2, "acc for pep: %s: %s\n", geneAcc,
		(copying ? "yes" : "no"));
        }
    if (copying)
        {
        fputs(line, outFh);
        fputc('\n', outFh);
        if (ferror(outFh))
            errnoAbort("write failed: %s: ", faOutPath);
        }
    }

gbOutputRename(faOutPath, &outFh);
gzLineFileClose(&inLf);
}
boolean copyFastaRec(struct gbSelect* select, struct gbFa* inFa,
                     struct outFa* nativeFa, struct outFa* xenoFa)
/* Read and copy a record to one of the output files, if selected */
{
char acc[GB_ACC_BUFSZ];
unsigned version;
struct gbEntry* entry;

if (!gbFaReadNext(inFa))
    return FALSE; /* EOF */

version = gbSplitAccVer(inFa->id, acc);
entry = gbReleaseFindEntry(select->release, acc);
if (entry != NULL)
    {
    char* seq = gbFaGetSeq(inFa);
    if (strlen(seq) < MIN_SEQ_SIZE)
        {
        if (gbVerbose >= 3)
            gbVerbPr(3, "skip %s, less than minimum sequence size", inFa->id);
        }
    else if ((version == entry->selectVer) && (entry->clientFlags & ALIGN_FLAG))
        {
        outFaWrite(((entry->orgCat == GB_NATIVE) ? nativeFa : xenoFa),  inFa);
        if (gbVerbose >= 3)
            gbVerbPr(3, "aligning %s %s", inFa->id,
                     gbOrgCatName(entry->orgCat));
        }
    else if ((version == entry->selectVer) && (entry->clientFlags & MIGRATE_FLAG))
        {
        if (gbVerbose >= 3)
            gbVerbPr(3, "migrating %s %s", inFa->id,
                     gbOrgCatName(entry->orgCat));
        }
    else 
        {
        assert(version != entry->selectVer);
        if (gbVerbose >= 3)
            gbVerbPr(3, "skip %s, wrong version %s != %d", 
                     gbOrgCatName(entry->orgCat), inFa->id,
                     entry->selectVer);
        }
    }
else
    {
    if (gbVerbose >= 3)
        gbVerbPr(3, "skip %s, no entry", inFa->id);
    }

return TRUE;
}
void checkOrgCat(struct gbEntry* entry, struct gbSelect* prevSelect)
/* Check for organism category changing for organisms we are managing. */
{
struct slTime* reported = NULL;
/* compare to latest processed entry */
checkOrgCats(entry, entry->processed->next, &reported);
if (prevSelect != NULL)
    {
    /* check against all processed entries in the previous release */
    struct gbEntry* prevEntry = gbReleaseFindEntry(prevSelect->release, entry->acc);
    if (prevEntry != NULL)
        checkOrgCats(entry, prevEntry->processed, &reported);
    }
slFreeList(&reported);
}
Beispiel #6
0
void getRequestedAccs(char* accFile, struct gbRelease* release,
                      struct hash* accTbl)
/* Mark for extraction the latest version of accs listed in a file */
{
struct slName* accList = loadAccList(accFile);
struct slName* acc;

for (acc = accList; acc != NULL; acc = acc->next)
    {
    struct gbEntry* entry = gbReleaseFindEntry(release, acc->name);
    if (entry != NULL)
        selectAcc(entry->processed, accTbl, NULL);
    }

slFreeList(&accList);
}
Beispiel #7
0
struct gbAligned* findPrevAligned(struct gbSelect* prevSelect,
                                  struct gbProcessed* processed)
/* Check to see if a accession is in the prevAligned.  If the organism
 * category of the alignment doesn't match the entry, it will be ignored.
 * This can happen if organism aliases are added or the organism name changed.
 */
{
struct gbAligned* prevAligned = NULL;  /* default if no previous */
struct gbEntry* prevEntry = gbReleaseFindEntry(prevSelect->release,
                                               processed->entry->acc);
if (prevEntry != NULL)
    prevAligned = gbEntryFindAlignedVer(prevEntry, processed->version);
if ((prevAligned != NULL)
    && (prevAligned->alignOrgCat != processed->entry->orgCat))
    return NULL;  /* ignore due to category change */
else
    return prevAligned;
}
Beispiel #8
0
static void processSeq(struct gbSelect* select, struct gbFa* inFa)
/* process the next sequence from an update fasta file, possibly outputing
 * the sequence */
{
char acc[GB_ACC_BUFSZ], hdrBuf[GB_ACC_BUFSZ], *hdr = NULL;
short version = gbSplitAccVer(inFa->id, acc);

/* will return NULL on ignored sequences */
struct gbEntry* entry = gbReleaseFindEntry(select->release, acc);

if ((entry != NULL) && (version == entry->selectVer) && !entry->clientFlags)
    {
    /* selected, output if it appears valid */
    if (isValidMrnaSeq(inFa))
        {
        if (!gInclVersion)
            {
            /* put version in comment */
            safef(hdrBuf, sizeof(hdrBuf), "%s %d", acc, version);
            hdr = hdrBuf;
            }
        gbFaWriteFromFa(gOutFa, inFa, hdr);
        entry->clientFlags = TRUE; /* flag so only gotten once */
        }
    else
        {
        fprintf(stderr, "warning: %s does not appear to be a valid mRNA sequence, skipped: %s:%d\n",
                inFa->id, inFa->fileName, inFa->recLineNum);
        }
    }
/* trace if enabled */
if (gbVerbose >= 3)
    {
    if (entry == NULL)
        gbVerbPr(3, "no entry: %s.%d", acc, version);
    else if (entry->selectVer <= 0)
        gbVerbPr(3, "not selected: %s.%d", acc, version);
    else if (version != entry->selectVer)
        gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer);
    else
        gbVerbPr(3, "save: %s.%d", acc, version);
    }
}
void checkEst(struct gbRelease* mrnaRelease,
              struct gbEntry* entry,
              struct gbSelect* prevSelect)
/* Check an EST, check for type change and orgCat change for
 * any of genomes in use */
{
struct gbEntry* mrnaEntry = gbReleaseFindEntry(mrnaRelease, entry->acc);
if (mrnaEntry != NULL)
    {
    /* type changed, output in format for ignore.idx */
    if (mrnaEntry->processed->modDate > entry->processed->modDate)
        gbError("%s\t%s\t%s\t%s changes type EST to mRNA",
                mrnaEntry->acc, gbFormatDate(entry->processed->modDate),
                gbSrcDbName(mrnaRelease->srcDb),
                gbFormatDate(mrnaEntry->processed->modDate));
    else
        gbError("%s\t%s\t%s\t%s changes type mRNA to EST",
                mrnaEntry->acc, gbFormatDate(mrnaEntry->processed->modDate),
                gbSrcDbName(mrnaRelease->srcDb),
                gbFormatDate(entry->processed->modDate));
    }
checkOrgCat(entry, prevSelect);
}
Beispiel #10
0
static void selectStatus(struct gbStatusTbl* statusTbl,
                         struct gbStatus* tmpStatus,
                         void* clientData)
/* Function called to determine if a status entry should be loaded.  This
 * compares the status parsed from the gbStatus file with the gbIndex.
 * Unchanged entries are not loaded into the table, decresing memory required
 * for incremental loads.
 */
{
struct selectStatusData* ssData = clientData;
struct gbEntry* entry = gbReleaseFindEntry(ssData->select->release,
                                           tmpStatus->acc);
struct gbProcessed* processed = NULL;
struct gbAligned* aligned = NULL;
struct hashEl* seqAccEl = hashLookup(ssData->seqHash, tmpStatus->acc);

/* check if in seq table, record if found */
if (seqAccEl == NULL)
    {
    fprintf(stderr, "Error: %s is in gbStatus but not in gbSeq table\n",
            tmpStatus->acc);
    gErrorCnt++;
    }
else
    seqAccEl->val = (void*)TRUE;

if (entry != NULL)
    processed = getProcAligned(entry, &aligned);
/* if no entry or not aligned, or if it shouldn't be included, delete */
if ((entry == NULL) || (aligned == NULL))
    markDeleted(statusTbl, tmpStatus, ssData);
else if (!loadNonCoding && (processed->molType != mol_mRNA))
    markIgnore(statusTbl, tmpStatus, entry);
else
    {
    /* validate entries are not going backwards */
    if (aligned->version < tmpStatus->version)
        errAbort("version for %s in release (%d) is less than one in database (%d)",
                 entry->acc, aligned->version, tmpStatus->version);
    if (processed->modDate < tmpStatus->modDate)
        {
        fprintf(stderr, "Warning: modDate for %s in release (%s) is before one in database (%s)\n",
                entry->acc, gbFormatDate(processed->modDate),
                gbFormatDate(tmpStatus->modDate));
        }
    /* flag updates for changed for latter processing, order of checks is
     * very important.*/
    if ((aligned->version > tmpStatus->version)
        || (aligned->numAligns != tmpStatus->numAligns))
        markSeqChanged(statusTbl, tmpStatus, processed, aligned);
    else if (processed->modDate != tmpStatus->modDate)
        markMetaChanged(ssData->select, statusTbl, tmpStatus, processed,
                        aligned);
    else if (statusTbl->extFileUpdate
             && !sameString(tmpStatus->extRelease,
                            ssData->select->release->version))
        markExtChanged(statusTbl, tmpStatus, processed, aligned);
    else if ((gOptions->flags & DBLOAD_REBUILD_DERIVED)
             && (entry->type == GB_MRNA))
        markRebuildDerived(statusTbl, tmpStatus, processed, aligned);
    else 
        markNoChange(statusTbl, tmpStatus, entry);
    }
}