Beispiel #1
0
void doOutGff(char *table, struct sqlConnection *conn, boolean outputGtf)
/* Save as GFF/GTF. */
{
struct hTableInfo *hti = getHti(database, table, conn);
struct bed *bedList;
struct hash *chromHash = NULL;
struct slName *exonFramesList = NULL;
char source[HDB_MAX_TABLE_STRING];
int itemCount;
struct region *region, *regionList = getRegions();

textOpen();

int efIdx = -1;
safef(source, sizeof(source), "%s_%s", database, table);
if (conn)
    {
    boolean simpleTableExists = sqlTableExists(conn, table);
    // simpleTable means not split table, not custom track
    // However it still can include bbi table with bam fileName path
    if (simpleTableExists)  // no tables having exonFrames are split tables anyway
        efIdx = sqlFieldIndex(conn, table, "exonFrames");
    itemCount = 0;
    int regionCount = slCount(regionList);
    // regionList can have many thousands of items e.g. rheMac3 has 34000 chroms!
    // This regionCount threshold should be just above the # chroms in the latest human assembly
    if (simpleTableExists && (regionCount > 500))
        {
        chromHash = makeChromHashForTable(conn, table);
        }
    }
// Note: code could be added here to extract exonFrames from bigGenePred

// Process each region
for (region = regionList; region != NULL; region = region->next)
    {
    if (chromHash && (!hashFindVal(chromHash, region->chrom)))
	    continue;
    struct lm *lm = lmInit(64*1024);
    int fieldCount;
    bedList = cookedBedList(conn, table, region, lm, &fieldCount);
    // Use exonFrames field if available for better accuracy instead of calculating from coordinates
    if (efIdx != -1) 
	exonFramesList = getExonFrames(table, conn, bedList);
    itemCount += bedToGffLines(bedList, exonFramesList, hti, fieldCount, source, outputGtf);
    lmCleanup(&lm);
    }
if (itemCount == 0)
    hPrintf(NO_RESULTS);
}
Beispiel #2
0
void doGenomicDna(struct sqlConnection *conn)
/* Get genomic sequence (UI has already told us how). */
{
struct region *region, *regionList = getRegions();
struct hTableInfo *hti = getHti(database, curTable, conn);
int fieldCount;
textOpen();
int resultCount = 0;
for (region = regionList; region != NULL; region = region->next)
    {
    struct lm *lm = lmInit(64*1024);
    struct bed *bedList = cookedBedList(conn, curTable, region, lm, &fieldCount);
    if (bedList != NULL)
    	resultCount += hgSeqBed(database, hti, bedList);
    lmCleanup(&lm);
    }
if (!resultCount)
    hPrintf(NO_RESULTS);
}
Beispiel #3
0
boolean doGetBedOrCt(struct sqlConnection *conn, boolean doCt,
                     boolean doCtFile, boolean redirectToGb)
/* Actually output bed or custom track. Return TRUE unless no results. */
{
char *db = cloneString(database);
char *table = curTable;
struct hTableInfo *hti = getHti(db, table, conn);
struct featureBits *fbList = NULL, *fbPtr;
struct customTrack *ctNew = NULL;
boolean doCtHdr = (cartUsualBoolean(cart, hgtaPrintCustomTrackHeaders, FALSE)
	|| doCt || doCtFile);
char *ctWigOutType = cartCgiUsualString(cart, hgtaCtWigOutType, outWigData);
char *fbQual = fbOptionsToQualifier();
char fbTQ[128];
int fields = hTableInfoBedFieldCount(hti);
boolean gotResults = FALSE;
struct region *region, *regionList = getRegions();
boolean isBedGr = isBedGraph(curTable);
boolean isBgWg = isBigWigTable(curTable);
boolean needSubtrackMerge = anySubtrackMerge(database, curTable);
boolean doDataPoints = FALSE;
boolean isWig = isWiggle(database, table);
struct wigAsciiData *wigDataList = NULL;
struct dataVector *dataVectorList = NULL;
boolean doRgb = bedItemRgb(hTrackDbForTrack(db, curTable));

if (!cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE) && !doCt)
    {
    textOpen();
    }

if (cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE))
    fputs("#", stdout);

if ((isWig || isBedGr || isBgWg) && sameString(outWigData, ctWigOutType))
    doDataPoints = TRUE;

for (region = regionList; region != NULL; region = region->next)
    {
    struct bed *bedList = NULL, *bed;
    struct lm *lm = lmInit(64*1024);
    struct dataVector *dv = NULL;

    if (isWig && doDataPoints)
        {
        if (needSubtrackMerge)
            {
            dv = wiggleDataVector(curTrack, curTable, conn, region);
            if (dv != NULL)
                slAddHead(&dataVectorList, dv);
            }
        else
            {
            int count = 0;
            struct wigAsciiData *wigData = NULL;
            struct wigAsciiData *asciiData;
            struct wigAsciiData *next;

            wigData = getWiggleAsData(conn, curTable, region);
            for (asciiData = wigData; asciiData; asciiData = next)
                {
                next = asciiData->next;
                if (asciiData->count)
                    {
                    slAddHead(&wigDataList, asciiData);
                    ++count;
                    }
                }
            slReverse(&wigDataList);
            }
        }
    else if (isBedGr && doDataPoints)
        {
        dv = bedGraphDataVector(curTable, conn, region);
        if (dv != NULL)
            slAddHead(&dataVectorList, dv);
        }
    else if (isBgWg && doDataPoints)
        {
        dv = bigWigDataVector(curTable, conn, region);
        if (dv != NULL)
            slAddHead(&dataVectorList, dv);
        }
    else if (isWig || isBgWg)
        {
        dv = wiggleDataVector(curTrack, curTable, conn, region);
        bedList = dataVectorToBedList(dv);
        dataVectorFree(&dv);
        }
    else if (isBedGr)
        {
        bedList = getBedGraphAsBed(conn, curTable, region);
        }
    else
        {
        bedList = cookedBedList(conn, curTable, region, lm, &fields);
        }

    /*  this is a one-time only initial creation of the custom track
     *  structure to receive the results.  gotResults turns it off after
     *  the first time.
     */
    if (doCtHdr && !gotResults &&
	((bedList != NULL) || (wigDataList != NULL) ||
         (dataVectorList != NULL)))
        {
        ctNew = beginCustomTrack(table, fields,
                                 doCt, (isWig || isBedGr || isBgWg), doDataPoints);
        }

    if (doDataPoints && (wigDataList || dataVectorList))
        gotResults = TRUE;
    else
        {
        if ((fbQual == NULL) || (fbQual[0] == 0))
            {
            for (bed = bedList;  bed != NULL;  bed = bed->next)
                {
                if (bed->name != NULL)
                    {
                    subChar(bed->name, ' ', '_');
                    }
                if (doCt)
                    {
                    struct bed *dupe = cloneBed(bed); /* Out of local memory. */
                    slAddHead(&ctNew->bedList, dupe);
                    }
                else
                    {
                    if (doRgb)
                        bedTabOutNitemRgb(bed, fields, stdout);
                    else
                        bedTabOutN(bed, fields, stdout);
                    }

                gotResults = TRUE;
                }
            }
        else
            {
            safef(fbTQ, sizeof(fbTQ), "%s:%s", hti->rootName, fbQual);
            fbList = fbFromBed(db, fbTQ, hti, bedList, 0, 0, FALSE, FALSE);
            if (fields >= 6)
                fields = 6;
            else if (fields >= 4)
                fields = 4;
            else
                fields = 3;
            if (doCt && ctNew)
                {
                ctNew->fieldCount = fields;
                safef(ctNew->tdb->type, strlen(ctNew->tdb->type)+1,
                      "bed %d", fields);
                }
            for (fbPtr=fbList;  fbPtr != NULL;  fbPtr=fbPtr->next)
                {
                if (fbPtr->name != NULL)
                    {
                    char *ptr = strchr(fbPtr->name, ' ');
                    if (ptr != NULL)
                        *ptr = 0;
                    }
                if (doCt)
                    {
                    struct bed *fbBed = fbToBedOne(fbPtr);
                    slAddHead(&ctNew->bedList, fbBed );
                    }
                else
                    {
                    if (fields >= 6)
                        hPrintf("%s\t%d\t%d\t%s\t%d\t%c\n",
                                fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name,
                                0, fbPtr->strand);
                    else if (fields >= 4)
                        hPrintf("%s\t%d\t%d\t%s\n",
                                fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name);
                    else
                        hPrintf("%s\t%d\t%d\n",
                                fbPtr->chrom, fbPtr->start, fbPtr->end);
                    }
                gotResults = TRUE;
                }
            featureBitsFreeList(&fbList);
            }
        }
    bedList = NULL;
    lmCleanup(&lm);
    }
if (!gotResults)
    {
    hPrintf(NO_RESULTS);
    }
else if (doCt)
    {
    int wigDataSize = 0;
    /* Load existing custom tracks and add this new one: */
    struct customTrack *ctList = getCustomTracks();
    removeNamedCustom(&ctList, ctNew->tdb->table);
    if (doDataPoints)
        {
        if (needSubtrackMerge || isBedGr || isBgWg)
            {
            slReverse(&dataVectorList);
            wigDataSize = dataVectorWriteWigAscii(dataVectorList, ctNew->wigAscii, 0, NULL);
            // TODO: see if can make prettier wig output here that
            // doesn't necessarily have one value per base
            }
        else
            {
            struct wiggleDataStream *wds = NULL;
            /* create an otherwise empty wds so we can print out the list */
            wds = wiggleDataStreamNew();
            wds->ascii = wigDataList;
            wigDataSize = wds->asciiOut(wds, db, ctNew->wigAscii, TRUE, FALSE);
#if defined(DEBUG)    /*      dbg     */
            /* allow file readability for debug */
            chmod(ctNew->wigAscii, 0666);
#endif
            wiggleDataStreamFree(&wds);
            }
        }
    else
        slReverse(&ctNew->bedList);

    slAddHead(&ctList, ctNew);
    /* Save the custom tracks out to file (overwrite the old file): */
    customTracksSaveCart(db, cart, ctList);
    /*  Put up redirect-to-browser page. */
    if (redirectToGb)
        {
        char browserUrl[256];
        char headerText[512];
        int redirDelay = 3;
        safef(browserUrl, sizeof(browserUrl),
              "%s?%s&db=%s", hgTracksName(), cartSidUrlString(cart), database);
        safef(headerText, sizeof(headerText),
              "<META HTTP-EQUIV=\"REFRESH\" CONTENT=\"%d;URL=%s\">",
              redirDelay, browserUrl);
        webStartHeader(cart, database, headerText,
                       "Table Browser: %s %s: %s", hOrganism(database),
                       freezeName, "get custom track");
        if (doDataPoints)
            {
            hPrintf("There are %d data points in custom track. ", wigDataSize);
            }
        else
            {
            hPrintf("There are %d items in custom track. ",
                    slCount(ctNew->bedList));
            }
        hPrintf("You will be automatically redirected to the genome browser in\n"
                "%d seconds, or you can \n"
                "<A HREF=\"%s\">click here to continue</A>.\n",
                redirDelay, browserUrl);
        }
    }
else if (doDataPoints)
    {
    if (needSubtrackMerge || isBedGr || isBgWg)
        {
        slReverse(&dataVectorList);
        dataVectorWriteWigAscii(dataVectorList, "stdout", 0, NULL);
        }
    else
        {
        /*	create an otherwise empty wds so we can print out the list */
        struct wiggleDataStream *wds = NULL;
        wds = wiggleDataStreamNew();
        wds->ascii = wigDataList;
        wds->asciiOut(wds, db, "stdout", TRUE, FALSE);
        wiggleDataStreamFree(&wds);
        }
    }
return gotResults;
}
Beispiel #4
0
void doSummaryStatsBed(struct sqlConnection *conn)
/* Put up page showing summary stats for track that is in database
 * or that is bed-format custom. */
{
    struct bed *bedList = NULL;
    struct region *regionList = getRegions(), *region;
    char *regionName = getRegionName();
    long long regionSize = 0, gapTotal = 0, realSize = 0;
    long startTime, midTime, endTime;
    long loadTime = 0, calcTime = 0, freeTime = 0;
    struct covStats *itemCovList = NULL, *blockCovList = NULL, *cov;
    int itemCount = 0;
    struct hTableInfo *hti = getHti(database, curTable, conn);
    int minScore = BIGNUM, maxScore = -BIGNUM;
    long long sumScores = 0;
    boolean hasBlocks = hti->hasBlocks;
    boolean hasScore = (hti->scoreField[0] != 0);
    int fieldCount;

    htmlOpen("%s (%s) Summary Statistics", curTableLabel(), curTable);

    for (region = regionList; region != NULL; region = region->next)
    {
        struct lm *lm = lmInit(64*1024);
        startTime = clock1000();
        bedList = cookedBedList(conn, curTable, region, lm, &fieldCount);
        if (fieldCount < 12)
            hasBlocks = FALSE;
        if (fieldCount < 5)
            hasScore = FALSE;
        midTime = clock1000();
        loadTime += midTime - startTime;

        if (bedList != NULL)
        {
            itemCount += slCount(bedList);
            regionSize += region->end - region->start;
            cov = calcSpanOverRegion(region, bedList);
            slAddHead(&itemCovList, cov);
            if (hasBlocks)
            {
                cov = calcBlocksOverRegion(region, bedList);
                slAddHead(&blockCovList, cov);
            }
            if (hti->scoreField[0] != 0)
            {
                struct bed *bed;
                for (bed = bedList; bed != NULL; bed = bed->next)
                {
                    int score = bed->score;
                    if (score < minScore) minScore = score;
                    if (score > maxScore) maxScore = score;
                    sumScores += score;
                }
            }
        }
        endTime = clock1000();
        calcTime += endTime - midTime;
        lmCleanup(&lm);
        bedList = NULL;
        freeTime  += clock1000() - endTime;
    }

    regionSize = basesInRegion(regionList, 0);
    gapTotal = gapsInRegion(conn, regionList, 0);
    realSize = regionSize - gapTotal;


    hTableStart();
    startTime = clock1000();
    numberStatRow("item count", itemCount);
    if (itemCount > 0)
    {
        cov = covStatsSum(itemCovList);
        percentStatRow("item bases", cov->basesCovered, realSize);
        percentStatRow("item total", cov->sumBases, realSize);
        numberStatRow("smallest item", cov->minBases);
        numberStatRow("average item", round((double)cov->sumBases/cov->itemCount));
        numberStatRow("biggest item", cov->maxBases);
    }

    if (hasBlocks && itemCount > 0)
    {
        cov = covStatsSum(blockCovList);
        hPrintf("<TR><TD>block count</TD><TD ALIGN=RIGHT>");
        printLongWithCommas(stdout, cov->itemCount);
        hPrintf("</TD></TR>\n");
        percentStatRow("block bases", cov->basesCovered, realSize);
        percentStatRow("block total", cov->sumBases, realSize);
        numberStatRow("smallest block", cov->minBases);
        numberStatRow("average block", round((double)cov->sumBases/cov->itemCount));
        numberStatRow("biggest block", cov->maxBases);
    }

    if (hasScore != 0 && itemCount > 0 && sumScores != 0)
    {
        numberStatRow("smallest score", minScore);
        numberStatRow("average score", round((double)sumScores/itemCount));
        numberStatRow("biggest score", maxScore);
    }
    hTableEnd();

    /* Show region and time stats part of stats page. */
    webNewSection("Region and Timing Statistics");
    hTableStart();
    stringStatRow("region", regionName);
    numberStatRow("bases in region", regionSize);
    numberStatRow("bases in gaps", gapTotal);
    floatStatRow("load time", 0.001*loadTime);
    floatStatRow("calculation time", 0.001*calcTime);
    floatStatRow("free memory time", 0.001*freeTime);
    stringStatRow("filter", (anyFilter() ? "on" : "off"));
    stringStatRow("intersection", (anyIntersection() ? "on" : "off"));
    hTableEnd();
    covStatsFreeList(&itemCovList);
    covStatsFreeList(&blockCovList);
    htmlClose();
}