Exemple #1
0
void gbAlignDataProcess(struct sqlConnection *conn, struct gbSelect* select,
                        struct gbStatusTbl* statusTbl)
/* Parse a psl file looking for accessions to add to the database.  If the
 * entry matches the status->selectAlign field, it will be saved for loading
 * and the count of aligned entries will be incremented. */
{
char pslPath[PATH_LEN];
char oiPath[PATH_LEN];

gbAlignedGetPath(select, "psl.gz", NULL, pslPath);
/* shouldn't have called this method if there no alignments counted */
if (!fileExists(pslPath))
    errAbort("PSL file does exist, yet genbank index indicates that it should: %s",
             pslPath);

processPslFile(conn, select, statusTbl, pslPath);

/* load the associated orientInfo file if native */
if (select->orgCats == GB_NATIVE)
    {
    strcpy(oiPath, pslPath);
    assert(endsWith(pslPath, ".psl.gz"));
    strcpy(oiPath + strlen(oiPath) - 7, ".oi.gz");
    processOIFile(conn, select, statusTbl, oiPath);
    }

/* for native ESTs, we might have an intronPsl file */
if ((select->type == GB_EST) && (select->orgCats == GB_NATIVE))
    {
    char intronPslPath[PATH_LEN];
    gbAlignedGetPath(select, "intronPsl.gz", NULL, intronPslPath);
    if (fileExists(intronPslPath))
        processIntronPslFile(conn, select, statusTbl, intronPslPath);
    }
}
Exemple #2
0
void createAlignedIndex(struct gbSelect* select, char* alignIdx)
/* create an alignment index from the alignRecs stored in the index.
 * it is not renamed from the tmp file name here, just closed
 */
{
struct gbProcessed* processed;
FILE *alignIdxFh;

/* setup output PSL files */
gbAlignedGetPath(select, "alidx", NULL, alignIdx);
alignIdxFh = gbMustOpenOutput(alignIdx);

/* visit all processed entries for this update  */
for (processed = select->update->processed; processed != NULL;
     processed = processed->updateLink)
    {
    struct gbEntry* entry = processed->entry;
    if ((entry->clientFlags & (MIGRATE_FLAG|ALIGN_FLAG))
         && (entry->orgCat & select->orgCats))
        {
        struct gbAligned* aligned = gbEntryFindAlignedVer(entry, processed->version);
        int numAligns = ((aligned != NULL) ? aligned->numAligns : 0);
        gbAlignedWriteIdxRec(alignIdxFh, entry->acc, processed->version, numAligns);
        }
    }

carefulClose(&alignIdxFh);
}
Exemple #3
0
void migratePsls(struct migrateAligns* migrate, unsigned pslFileType,
                 struct gbEntryCnts* counts, FILE* outPslFh)
/* Migrate selected PSL records */
{
char inPsl[PATH_LEN];
struct lineFile* inPslLf;
struct psl* psl;

gbAlignedGetPath(migrate->prevSelect, gPslFileGzExt[pslFileType], NULL, inPsl);

/* It's possible to end up here and not have a file if none of the sequences
 * aligned */
if (fileExists(inPsl))
    {
    gbVerbEnter(2, "migrating %ss from %s", gPslFileExt[pslFileType], inPsl);
    inPslLf = gzLineFileOpen(inPsl);
    while ((psl = pslNext(inPslLf)) != NULL)
        {
        migratePsl(migrate, pslFileType, counts, psl, inPsl, outPslFh);
        pslFree(&psl);
        }
    gzLineFileClose(&inPslLf);
    gbVerbLeave(2, "migrating %ss from %s", gPslFileExt[pslFileType], inPsl);
    }
}
void outFaOpen(struct outFa* outFa)
/* Open the fasta file  */
{
char ext[64];
char path[PATH_LEN];
assert(outFa->fa == NULL);

safef(ext, sizeof(ext), "%d.fa", outFa->nextPartNum);
gbAlignedGetPath(&outFa->select, ext, workDir, path);
outFa->fa = gbFaOpen(path, "w");
outFa->numSeqs = 0;
outFa->numBases = 0;

if (createPolyASizes)
    {
    safef(ext, sizeof(ext), "%d.polya", outFa->nextPartNum);
    gbAlignedGetPath(&outFa->select, ext, workDir, path);
    outFa->polyAFh = mustOpen(path, "w");
    }
outFa->nextPartNum++;
}
void markAligns(struct gbSelect* select, unsigned orgCat)
/* create a file indicating that sequences either needs aligned or migated for
 * this for this partation.  This is used to determine what needs to be
 * installed after the alignment.  This is needed  because they might be all
 * be migrate, so that fasta can't be the indicator. */
{
char path[PATH_LEN];
FILE* fh;
unsigned orgCatsHold = select->orgCats;
select->orgCats = orgCat;

gbAlignedGetPath(select, "aligns", workDir, path);
fh = gbMustOpenOutput(path);
gbOutputRename(path, &fh);

select->orgCats = orgCatsHold;
}
Exemple #6
0
static void processOrgCatOi(struct gbSelect* select, unsigned orgCat)
/* process files in an update an organism category.  OIs are only available
 * for native, however this follow the structure of the PSL code */
{
char inOi[PATH_LEN], *row[EST_ORIENT_INFO_NUM_COLS];
struct lineFile* inOiLf;
unsigned orgCatsHold = select->orgCats;
select->orgCats = orgCat;

gbAlignedGetPath(select, "oi.gz", NULL, inOi);

inOiLf = gzLineFileOpen(inOi);
while (lineFileNextRowTab(inOiLf, row, EST_ORIENT_INFO_NUM_COLS))
    {
    struct estOrientInfo* oi = estOrientInfoLoad(row);
    processOi(select, oi);
    estOrientInfoFree(&oi);
    }
gzLineFileClose(&inOiLf);
select->orgCats = orgCatsHold;
}
Exemple #7
0
void copyIntronPsls(struct gbSelect* select, FILE* outPslFh,
                    struct recCounts* recCounts)
/* Copy an intron PSL file from the work directory if it exists */
{
char inPsl[PATH_LEN];
struct lineFile* inPslLf;
struct psl* psl;

gbAlignedGetPath(select, "intronPsl", gWorkDir, inPsl);
if (fileExists(inPsl))
    {
    gbVerbEnter(2, "installing from %s", inPsl);
    inPslLf = gzLineFileOpen(inPsl);
    while ((psl = pslNext(inPslLf)) != NULL)
        {
        copyIntronPsl(select, psl, inPsl, outPslFh, recCounts);
        pslFree(&psl);
        }
    gzLineFileClose(&inPslLf);
    gbVerbLeave(2, "installing from %s", inPsl);
    }
}
Exemple #8
0
void copyPsls(struct gbSelect* select, unsigned pslFileType, FILE* outPslFh,
              struct gbEntryCnts* counts)
/* Copy a PSL file from the work directory if it exists, count alignments
 * for index. */
{
char inPsl[PATH_LEN];
struct lineFile* inPslLf;
struct psl* psl;

gbAlignedGetPath(select, gPslFileExt[pslFileType], gWorkDir, inPsl);
if (fileExists(inPsl))
    {
    gbVerbEnter(2, "installing from %s", inPsl);
    inPslLf = gzLineFileOpen(inPsl);
    while ((psl = pslNext(inPslLf)) != NULL)
        {
        copyPsl(select, pslFileType, psl, inPsl, outPslFh, counts);
        pslFree(&psl);
        }
    gzLineFileClose(&inPslLf);
    gbVerbLeave(2, "installing from %s", inPsl);
    }
}
Exemple #9
0
void migrateOrientInfos(struct migrateAligns* migrate, FILE* outOiFh)
/* Migrate estOrientInfo records */
{
char inOi[PATH_LEN];
struct lineFile* inOiLf;
char *row[EST_ORIENT_INFO_NUM_COLS];

gbAlignedGetPath(migrate->prevSelect, "oi.gz", NULL, inOi);

if (fileExists(inOi))
    {
    gbVerbEnter(2, "migrating from %s", inOi);
    inOiLf = gzLineFileOpen(inOi);
    while (lineFileNextRowTab(inOiLf, row, ArraySize(row)))
        {
        struct estOrientInfo *oi = estOrientInfoLoad(row);
        migrateOrientInfo(migrate, oi, inOi, outOiFh);
        estOrientInfoFree(&oi);
        }
    gzLineFileClose(&inOiLf);
    gbVerbLeave(2, "migrating from %s", inOi);
    }
}
Exemple #10
0
void copyOrientInfos(struct gbSelect* select, FILE* outOiFh,
                     struct recCounts* recCounts)
/* Copy an OI file from the work directory, if it exists, count alignments
 * for index. */
{
char inOi[PATH_LEN];
struct lineFile* inOiLf;
char *row[EST_ORIENT_INFO_NUM_COLS];

gbAlignedGetPath(select, "oi", gWorkDir, inOi);
if (fileExists(inOi))
    {
    gbVerbEnter(2, "installing from %s", inOi);
    inOiLf = gzLineFileOpen(inOi);
    while (lineFileNextRowTab(inOiLf, row, ArraySize(row)))
        {
        struct estOrientInfo *oi = estOrientInfoLoad(row);
        copyOrientInfo(select, oi, inOi, outOiFh, recCounts);
        estOrientInfoFree(&oi);
        }
    gzLineFileClose(&inOiLf);
    gbVerbLeave(2, "installing from %s", inOi);
    }
}
Exemple #11
0
void installOrgCatAligned(struct gbSelect* select, unsigned orgCat,
                          struct gbSelect* prevSelect,
                          struct gbAlignInfo* alignInfo,
                          char* alignIdx)
/* Install alignments for either native or xeno.  The alignment index is
 * created and named returned, but not renamed until both native and xeno are
 * processed. */
{
unsigned holdOrgCats = select->orgCats;
struct outputFiles out;
struct recCounts recCounts;
ZeroVar(&out);
ZeroVar(&recCounts);

select->orgCats = orgCat;
if (prevSelect != NULL)
    prevSelect->orgCats = orgCat;

/* setup out PSL and orientInfo files */
gbAlignedGetPath(select, "psl.gz", NULL, out.psl.path);
out.psl.fh = openSortOutput(out.psl.path, PSL_SORT_SPEC);
if (select->orgCats == GB_NATIVE)
    {
    gbAlignedGetPath(select, "oi.gz", NULL, out.oi.path);
    out.oi.fh = openSortOutput(out.oi.path, OI_SORT_SPEC);
    if (select->type == GB_EST)
        {
        gbAlignedGetPath(select, "intronPsl.gz", NULL, out.intronPsl.path);
        out.intronPsl.fh = openSortOutput(out.intronPsl.path, PSL_SORT_SPEC);
        }
    }
if (select->type == GB_MRNA)
    {
    /* we don't bother sorting raw psl */
    gbAlignedGetPath(select, "rawPsl.gz", NULL, out.rawPsl.path);
    out.rawPsl.fh = gbMustOpenOutput(out.rawPsl.path);
    }

/* previous aligned if this is a full update */
if (prevSelect != NULL)
    migrateAligned(select, prevSelect, alignInfo, &out, &recCounts);

/* copy currently aligned, if they exist */
copyPsls(select, MAIN_PSL_FILE, out.psl.fh, &recCounts.pslCnts);
if (select->type == GB_MRNA)
    copyPsls(select, RAW_PSL_FILE, out.rawPsl.fh, &recCounts.rawPslCnts);
if ((select->orgCats == GB_NATIVE) && (recCounts.pslCnts.recTotalCnt > 0))
    {
    /* copy new OI and intronPsls */
    copyOrientInfos(select, out.oi.fh, &recCounts);
    if (select->type == GB_EST)
        copyPsls(select, INTRON_PSL_FILE, out.intronPsl.fh,
                 &recCounts.intronPslCnts);
    }

/* Install or remove files.  Done seperate from copy due to posibility of
* all being migrated*/
if (recCounts.intronPslCnts.recTotalCnt > 0)
    gbOutputRename(out.intronPsl.path, &out.intronPsl.fh);
else
    gbOutputRemove(out.intronPsl.path, &out.intronPsl.fh);

if (recCounts.oiCnts.recTotalCnt > 0)
    gbOutputRename(out.oi.path, &out.oi.fh);
else
    gbOutputRemove(out.oi.path, &out.oi.fh);

if (recCounts.rawPslCnts.recTotalCnt > 0)
    gbOutputRename(out.rawPsl.path, &out.rawPsl.fh);
else
    gbOutputRemove(out.rawPsl.path, &out.rawPsl.fh);

if (recCounts.pslCnts.recTotalCnt > 0)
    gbOutputRename(out.psl.path, &out.psl.fh);
else
    gbOutputRemove(out.psl.path, &out.psl.fh);

createAlignedIndex(select, alignIdx);

select->orgCats = holdOrgCats;
if (prevSelect != NULL)
    prevSelect->orgCats = holdOrgCats;
}