Ejemplo n.º 1
0
static void processOi(struct gbSelect* select, struct estOrientInfo* oi)
/* process the next OI from an update OI file, possibly outputing
 * the alignment record */
{
char acc[GB_ACC_BUFSZ];
short version = gbSplitAccVer(oi->name, acc);

/* will return NULL on ignored sequences */
struct gbEntry* entry = gbReleaseFindEntry(select->release, acc);
if ((entry != NULL) && (version == entry->selectVer))
    {
    /* selected */
    if (!gInclVersion)
        strcpy(oi->name, acc);  /* remove version */
    estOrientInfoTabOut(oi, gOutOi);
    entry->clientFlags = TRUE; /* flag so we know we got it */
    }
/* trace if enabled */
if (gbVerbose >= 3)
    {
    if (entry == NULL)
        gbVerbPr(3, "no entry: %s.%d", acc, version);
    else if (entry->selectVer <= 0)
        gbVerbPr(3, "not selected: %s.%d", acc, version);
    else if (version != entry->selectVer)
        gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer);
    else
        gbVerbPr(3, "save: %s.%d", acc, version);
    }
}
Ejemplo n.º 2
0
static void checkNewEntry(struct gbSelect* select, struct gbStatusTbl* statusTbl,
                          struct gbEntry* entry)
/* check if an entry is new */
{
if (entry->selectVer == NULL_VERSION)
    {
    /* new entry, get the alignment.  However if the processed directory
     * has not been aligned yet, it might not exist, in which case, it's
     * ignored.*/
    struct gbAligned* aligned = NULL;
    struct gbProcessed* processed = getProcAligned(entry, &aligned);
    if (!loadNonCoding && (processed != NULL) && (processed->molType != mol_mRNA))
        gbVerbPr(5, "nonCoding: %s.%d %s", entry->acc, entry->processed->version, gbMolTypeSym(processed->molType));
    else if (aligned != NULL)
        {
        struct gbStatus* status
            = gbStatusTblAdd(statusTbl, entry->acc,
                             aligned->version, processed->modDate,
                             entry->type, select->release->srcDb,
                             entry->orgCat, 0, 0,
                             aligned->update->release->version,
                             aligned->update->shortName, 0);
        markNew(statusTbl, status, processed, aligned);
        }
    else if (gbVerbose >= 5)
        {
        gbVerbPr(5, "notAligned: %s.%d %s", entry->acc, entry->processed->version,
                 gbFormatDate(entry->processed->modDate));
        }
    }
}
boolean copyFastaRec(struct gbSelect* select, struct gbFa* inFa,
                     struct outFa* nativeFa, struct outFa* xenoFa)
/* Read and copy a record to one of the output files, if selected */
{
char acc[GB_ACC_BUFSZ];
unsigned version;
struct gbEntry* entry;

if (!gbFaReadNext(inFa))
    return FALSE; /* EOF */

version = gbSplitAccVer(inFa->id, acc);
entry = gbReleaseFindEntry(select->release, acc);
if (entry != NULL)
    {
    char* seq = gbFaGetSeq(inFa);
    if (strlen(seq) < MIN_SEQ_SIZE)
        {
        if (gbVerbose >= 3)
            gbVerbPr(3, "skip %s, less than minimum sequence size", inFa->id);
        }
    else if ((version == entry->selectVer) && (entry->clientFlags & ALIGN_FLAG))
        {
        outFaWrite(((entry->orgCat == GB_NATIVE) ? nativeFa : xenoFa),  inFa);
        if (gbVerbose >= 3)
            gbVerbPr(3, "aligning %s %s", inFa->id,
                     gbOrgCatName(entry->orgCat));
        }
    else if ((version == entry->selectVer) && (entry->clientFlags & MIGRATE_FLAG))
        {
        if (gbVerbose >= 3)
            gbVerbPr(3, "migrating %s %s", inFa->id,
                     gbOrgCatName(entry->orgCat));
        }
    else 
        {
        assert(version != entry->selectVer);
        if (gbVerbose >= 3)
            gbVerbPr(3, "skip %s, wrong version %s != %d", 
                     gbOrgCatName(entry->orgCat), inFa->id,
                     entry->selectVer);
        }
    }
else
    {
    if (gbVerbose >= 3)
        gbVerbPr(3, "skip %s, no entry", inFa->id);
    }

return TRUE;
}
Ejemplo n.º 4
0
void copyPsl(struct gbSelect* select, unsigned pslFileType,
             struct psl* psl, char* inPsl,
             FILE* outPslFh, struct gbEntryCnts* counts)
/* Copy a PSL. */
{
char acc[GB_ACC_BUFSZ];
int version = gbSplitAccVer(psl->qName, acc);
struct gbAligned* aligned;
struct gbEntry* entry = getEntry(select, acc, inPsl);
if (entry == NULL)
    errAbort("no entry for %s %s in %s", gPslFileExt[pslFileType],
             psl->qName, inPsl);
aligned = gbEntryGetAligned(entry, select->update, version, NULL);
pslTabOut(psl, outPslFh);
if (pslFileType == MAIN_PSL_FILE)
    {
    /* count main psls in index. */
    gbAlignedCount(aligned, 1);
    /* increment accession count if this is * the first one */
    gbCountNeedAligned(counts, entry,
                           ((aligned->numAligns == 1) ? 1 : 0), 1);
    }
else
    {
    /* for rawPsl and intronPsl only count PSLs */
    gbCountNeedAligned(counts, entry, 0, 1);
    }
if (gbVerbose >= 3)
    gbVerbPr(3, "installing %s %s %s.%d", gbOrgCatName(entry->orgCat),
             gPslFileExt[pslFileType], acc, version);
}
Ejemplo n.º 5
0
void migratePsl(struct migrateAligns* migrate, unsigned pslFileType,
                struct gbEntryCnts* counts, struct psl* psl,
                char* inPsl, FILE* outPslFh)
/* Migrate PSL, if it's accession and version are flagged */
{
struct gbAligned* aligned = getMigrateAligned(migrate, psl->qName, inPsl);
if (aligned != NULL)
    {
    pslTabOut(psl, outPslFh);

    if (pslFileType == MAIN_PSL_FILE)
        {
        /* count main psls in index. */
        gbAlignedCount(aligned, 1);
        /* increment accession count if this is the first one */
        gbCountNeedAligned(counts, aligned->entry,
                           ((aligned->numAligns == 1) ? 1 : 0), 1);
        }
    else
        {
        /* for rawPsl and intronPsl only count PSLs */
        gbCountNeedAligned(counts, aligned->entry, 0, 1);
        }
    if (gbVerbose >= 3)
        gbVerbPr(3, "migrating %s %s %s",
                 gbOrgCatName(aligned->entry->orgCat), 
                 gPslFileExt[pslFileType], psl->qName);
    }
}
Ejemplo n.º 6
0
static void endStep(struct gbIndex* index,
                    struct stepInfo* info)
/* print the end of step message and record state  */
{
int numEntries = getNumEntries(index);

gbVerbPr(0, "end %s: acc-added=%d, acc-total=%d ", info->step,
         (numEntries - info->startNumEntries), numEntries);
}
Ejemplo n.º 7
0
static void processSeq(struct gbSelect* select, struct gbFa* inFa)
/* process the next sequence from an update fasta file, possibly outputing
 * the sequence */
{
char acc[GB_ACC_BUFSZ], hdrBuf[GB_ACC_BUFSZ], *hdr = NULL;
short version = gbSplitAccVer(inFa->id, acc);

/* will return NULL on ignored sequences */
struct gbEntry* entry = gbReleaseFindEntry(select->release, acc);

if ((entry != NULL) && (version == entry->selectVer) && !entry->clientFlags)
    {
    /* selected, output if it appears valid */
    if (isValidMrnaSeq(inFa))
        {
        if (!gInclVersion)
            {
            /* put version in comment */
            safef(hdrBuf, sizeof(hdrBuf), "%s %d", acc, version);
            hdr = hdrBuf;
            }
        gbFaWriteFromFa(gOutFa, inFa, hdr);
        entry->clientFlags = TRUE; /* flag so only gotten once */
        }
    else
        {
        fprintf(stderr, "warning: %s does not appear to be a valid mRNA sequence, skipped: %s:%d\n",
                inFa->id, inFa->fileName, inFa->recLineNum);
        }
    }
/* trace if enabled */
if (gbVerbose >= 3)
    {
    if (entry == NULL)
        gbVerbPr(3, "no entry: %s.%d", acc, version);
    else if (entry->selectVer <= 0)
        gbVerbPr(3, "not selected: %s.%d", acc, version);
    else if (version != entry->selectVer)
        gbVerbPr(3, "not version: %s.%d != %d", acc, version, entry->selectVer);
    else
        gbVerbPr(3, "save: %s.%d", acc, version);
    }
}
Ejemplo n.º 8
0
static void refPepDropOne(struct sqlConnection *conn,
                          struct brokenRefPep *brp,
                          struct sqlDeleter* seqTblDeleter,
                          boolean dryRun)
/* drop a refPep */
{
gbVerbPr(2, "%s\t%s\tdrop", sqlGetDatabase(conn), brp->protAcc);
if (!dryRun)
    sqlDeleterAddAcc(seqTblDeleter, brp->protAcc);
}
Ejemplo n.º 9
0
static void flagNeedAligned(struct gbSelect* select,
                            struct gbSelect* prevSelect,
                            struct gbProcessed* processed,
                            struct gbAlignInfo* alignInfo)
/* Function called for each sequence to set alignment and migrate flags.  The
 * migrate flag is set in the previous and curent entries, the align flag set
 * in only in current ones. */
{
struct gbAligned* prevAligned = NULL;
if (prevSelect != NULL)
    prevAligned = findPrevAligned(prevSelect, processed);

/* Migrate if same acc is aligned in the previous release and passed other
 * checks, otherwise mark the entry for alignment. */
if ((prevAligned != NULL) && canMigrate(processed, prevAligned))
    {
    struct gbEntry* prevEntry = prevAligned->entry;
    prevEntry->selectVer = prevAligned->version;
    prevEntry->clientFlags |= MIGRATE_FLAG;
    processed->entry->clientFlags |= MIGRATE_FLAG;
    prevAligned->update->selectAlign |= prevEntry->orgCat;
    gbCountNeedAligned(&alignInfo->migrate, prevEntry, 1, prevAligned->numAligns);
    if (gbVerbose >= 3)
        gbVerbPr(3, "migrate %s %s.%d %d psls", 
                 gbOrgCatName(prevEntry->orgCat), prevEntry->acc,
                 prevAligned->version, prevAligned->numAligns);
    }
else
    {
    struct gbEntry* entry = processed->entry;
    entry->selectVer = processed->version;
    entry->clientFlags |= ALIGN_FLAG;
    processed->update->selectProc |= entry->orgCat;
    gbCountNeedAligned(&alignInfo->align, entry, 1, 0);
    if (gbVerbose >= 3)
        gbVerbPr(3, "align %s %s.%d", gbOrgCatName(entry->orgCat),
                 entry->acc, processed->version);
    }
}
Ejemplo n.º 10
0
void migrateOrientInfo(struct migrateAligns* migrate,
                       struct estOrientInfo* oi, char* inOi, FILE* outOiFh)
/* Migrate a estOrientInfo row, if it's accession and version are flagged */
{
struct gbAligned* aligned = getMigrateAligned(migrate, oi->name, inOi);
if (aligned != NULL)
    {
    if (gbVerbose >= 3)
        gbVerbPr(3, "migrating %s oi %s",
                 gbOrgCatName(aligned->entry->orgCat), oi->name);
    estOrientInfoTabOut(oi, outOiFh);
    /* just count records */
    gbCountNeedAligned(&migrate->counts.oiCnts, aligned->entry, 0, 1);
    }
}
Ejemplo n.º 11
0
void copyIntronPsl(struct gbSelect* select, struct psl* psl, char* inPsl,
                   FILE* outPslFh, struct recCounts* recCounts)
/* Copy an intronPsl. */
{
char acc[GB_ACC_BUFSZ];
struct gbEntry* entry;
gbSplitAccVer(psl->qName, acc);
entry = getEntry(select, acc, inPsl);
if (entry != NULL)
    {
    if (gbVerbose >= 3)
        gbVerbPr(3, "installing %s intronPsl %s", gbOrgCatName(entry->orgCat),
                 psl->qName);
    pslTabOut(psl, outPslFh);
    /* just count records */
    gbCountNeedAligned(&recCounts->intronPslCnts, entry, 0, 1);
    }
}
Ejemplo n.º 12
0
void copyOrientInfo(struct gbSelect* select, struct estOrientInfo* oi,
                    char* inOi, FILE* outOiFh, struct recCounts* recCounts)
/* Copy a orientInfo record. */
{
char acc[GB_ACC_BUFSZ];
struct gbEntry* entry;
gbSplitAccVer(oi->name, acc);
entry = getEntry(select, acc, inOi);
if (entry != NULL)
    {
    if (gbVerbose >= 3)
        gbVerbPr(3, "installing %s oi %s", gbOrgCatName(entry->orgCat),
                 oi->name);
    estOrientInfoTabOut(oi, outOiFh);
    /* just count records */
    gbCountNeedAligned(&recCounts->oiCnts, entry, 0, 1);
    }
}
Ejemplo n.º 13
0
static void raInfoAdd(struct raInfoTbl *rit, struct hash *raRec,
                      char *acc, short ver, char *seqSzFld, char *offFld, char *recSzFld,
                      unsigned extFileId)
/* add a ra mrna or pep */
{
    struct hashEl *hel;
    struct raInfo *ri;
    char accVer[GB_ACC_BUFSZ];
    if (extFileId == 0)
        errAbort("no extFileId for %s.%d", acc, ver);
    gbVerbPr(10, "raAdd %s.%d ext %d", acc, ver, extFileId);
    lmAllocVar(rit->accMap->lm, ri);
    safef(accVer, sizeof(accVer), "%s.%d", acc, ver);
    hel = hashAdd(rit->accMap, accVer, ri);
    ri->acc = lmCloneString(rit->accMap->lm, acc);
    ri->version = ver;
    ri->size = sqlUnsigned((char*)hashMustFindVal(raRec, seqSzFld));
    ri->offset = sqlLongLong((char*)hashMustFindVal(raRec, offFld));
    ri->fileSize = sqlUnsigned((char*)hashMustFindVal(raRec, recSzFld));
    ri->extFileId = extFileId;
}