Exemple #1
0
static void getFastaOffsets(struct brokenRefPepTbl *brpTbl,
                            struct sqlConnection *conn,
                            struct extFileTbl* extFileTbl,
                            char *faPath)
/* parse fasta file to get offsets of proteins */
{
struct gbFa *fa = gbFaOpen(faPath, "r");
char acc[GB_ACC_BUFSZ];
struct brokenRefPep *brp;
HGID extId = extFileTblGet(extFileTbl, conn, faPath);

gbVerbMsg(5, "scanning fasta: %s", faPath);
while (gbFaReadNext(fa))
    {
    gbVerbMsg(5, "   %s: %lld", fa->id, (long long)fa->recOff);
    /* save only if same acecss, version, and file (to match mrna fa) */
    short ver = gbSplitAccVer(fa->id, acc);
    brp = hashFindVal(brpTbl->protAccHash, acc);
    if ((brp != NULL) && (ver == brp->protVer) && sameString(faPath, brp->newFaPath))
        {
        gbFaGetSeq(fa); /* force read of sequence data */
        brp->newFaId = extId;
        brp->newFaOff = fa->recOff;
        brp->newSeqSize = fa->seqLen;
        brp->newRecSize = fa->off-fa->recOff;
        gbVerbMsg(5, "      save: %s %lld for %lld\n", fa->id, (long long)fa->recOff, (long long)fa->off);
        }
    }
gbFaClose(&fa);
}
Exemple #2
0
void seqDataProcessUpdate(struct gbSelect* select)
/* Get sequences for a partition and update.  Partition processed index should
 * be loaded and selected versions flaged. */
{
char inFasta[PATH_LEN];
struct gbFa* inFa;
gbProcessedGetPath(select, "fa", inFasta);
inFa = gbFaOpen(inFasta, "r"); 
while (gbFaReadNext(inFa))
    processSeq(select, inFa);
gbFaClose(&inFa);
}
boolean copyFastaRec(struct gbSelect* select, struct gbFa* inFa,
                     struct outFa* nativeFa, struct outFa* xenoFa)
/* Read and copy a record to one of the output files, if selected */
{
char acc[GB_ACC_BUFSZ];
unsigned version;
struct gbEntry* entry;

if (!gbFaReadNext(inFa))
    return FALSE; /* EOF */

version = gbSplitAccVer(inFa->id, acc);
entry = gbReleaseFindEntry(select->release, acc);
if (entry != NULL)
    {
    char* seq = gbFaGetSeq(inFa);
    if (strlen(seq) < MIN_SEQ_SIZE)
        {
        if (gbVerbose >= 3)
            gbVerbPr(3, "skip %s, less than minimum sequence size", inFa->id);
        }
    else if ((version == entry->selectVer) && (entry->clientFlags & ALIGN_FLAG))
        {
        outFaWrite(((entry->orgCat == GB_NATIVE) ? nativeFa : xenoFa),  inFa);
        if (gbVerbose >= 3)
            gbVerbPr(3, "aligning %s %s", inFa->id,
                     gbOrgCatName(entry->orgCat));
        }
    else if ((version == entry->selectVer) && (entry->clientFlags & MIGRATE_FLAG))
        {
        if (gbVerbose >= 3)
            gbVerbPr(3, "migrating %s %s", inFa->id,
                     gbOrgCatName(entry->orgCat));
        }
    else 
        {
        assert(version != entry->selectVer);
        if (gbVerbose >= 3)
            gbVerbPr(3, "skip %s, wrong version %s != %d", 
                     gbOrgCatName(entry->orgCat), inFa->id,
                     entry->selectVer);
        }
    }
else
    {
    if (gbVerbose >= 3)
        gbVerbPr(3, "skip %s, no entry", inFa->id);
    }

return TRUE;
}