Пример #1
0
struct annoRow *annoGratorIntegrate(struct annoGrator *self, struct annoStreamRows *primaryData,
				    boolean *retRJFilterFailed, struct lm *callerLm)
/* Given a single row from the primary source, get all overlapping rows from internal
 * source, and produce joined output rows.
 * If retRJFilterFailed is non-NULL:
 * - any overlapping row has a rightJoin filter failure (see annoFilter.h), or
 * - overlap rule is agoMustOverlap and no rows overlap, or
 * - overlap rule is agoMustNotOverlap and any overlapping row is found,
 * then set retRJFilterFailed and stop. */
{
struct annoRow *primaryRow = primaryData->rowList;
struct annoRow *rowList = NULL;
agCheckPrimarySorting(self, primaryRow);
// In order to catch the intersection of two 0-length elements (i.e. two insertions),
// we have to broaden our search a little:
int pStart = primaryRow->start, pEnd = primaryRow->end;
if (pStart == pEnd)
    {
    pStart--;
    pEnd++;
    }
char *pChrom = primaryRow->chrom;
agTrimToStart(self, pChrom, pStart);
agFetchToEnd(self, pChrom, pStart, pEnd);
boolean rjFailHard = (retRJFilterFailed != NULL);
if (rjFailHard)
    *retRJFilterFailed = FALSE;
struct annoRow *qRow;
for (qRow = self->qHead;  qRow != NULL;  qRow = qRow->next)
    {
    if (qRow->start < pEnd && qRow->end > pStart && sameString(qRow->chrom, pChrom))
	{
	int numCols = self->mySource->numCols;
	enum annoRowType rowType = self->mySource->rowType;
	slAddHead(&rowList, annoRowClone(qRow, rowType, numCols, callerLm));
	if (rjFailHard && qRow->rightJoinFail)
	    {
	    *retRJFilterFailed = TRUE;
	    break;
	    }
	}
    }
slReverse(&rowList);
// If no rows overlapped primary, and there is a right-join, !isExclude (i.e. isInclude) filter,
// then we need to set retRJFilterFailed because the condition was not met to include
// the primary item.
if (retRJFilterFailed &&
    ((rowList == NULL && (self->haveRJIncludeFilter || self->overlapRule == agoMustOverlap)) ||
     (rowList != NULL && self->overlapRule == agoMustNotOverlap)))
    *retRJFilterFailed = TRUE;
return rowList;
}
Пример #2
0
static struct annoRow *nextQueuedRow(struct annoStreamDb *self, struct lm *callerLm)
// Return the head of either bigItemQueue or smallItemQueue, depending on which has
// the lower chromStart.
{
struct annoRow *row = NULL;
if (self->bigItemQueue && annoRowCmp(&(self->bigItemQueue), &(self->smallItemQueue)) < 0)
    row = slPopHead(&(self->bigItemQueue));
else
    row = slPopHead(&(self->smallItemQueue));
if (self->bigItemQueue == NULL && self->smallItemQueue == NULL)
    // All done merge-sorting, just stream finest-bin items from here on out.
    self->mergeBins = FALSE;
enum annoRowType rowType = self->streamer.rowType;
int numCols = self->streamer.numCols;
return annoRowClone(row, rowType, numCols, callerLm);
}
Пример #3
0
static struct annoRow *mergeRow(struct annoStreamDb *self, struct annoRow *aRow,
				struct lm *callerLm)
/* Compare head of bigItemQueue with (finest-bin) aRow; return the one with
 * lower chromStart and save the other for later.  */
{
struct annoRow *outRow = aRow;
if (self->bigItemQueue == NULL)
    {
    // No coarse-bin items to merge-sort, just stream finest-bin items from here on out.
    resetMergeState(self);
    }
else if (annoRowCmp(&(self->bigItemQueue), &aRow) < 0)
    {
    // Big item gets to go now, so save aside small item for next time.
    outRow = slPopHead(&(self->bigItemQueue));
    slAddHead(&(self->smallItemQueue), aRow);
    }
enum annoRowType rowType = self->streamer.rowType;
int numCols = self->streamer.numCols;
return annoRowClone(outRow, rowType, numCols, callerLm);
}
Пример #4
0
static struct annoRow *nextRowMergeBins(struct annoStreamDb *self, char *minChrom, uint minEnd,
					struct lm *callerLm)
/* Fetch the next filtered row from mysql, merge-sorting coarse-bin items into finest-bin
 * items to maintain chromStart ordering. */
{
assert(self->mergeBins && self->hasBin);
if (self->smallItemQueue)
    // In this case we have already begun merge-sorting; don't pull a new row from mysql,
    // use the queues.  This should keep smallItemQueue's max depth at 1.
    return nextQueuedRow(self, callerLm);
else
    {
    // We might need to collect initial coarse-bin items, or might already be merge-sorting.
    boolean rightFail = FALSE;
    char **row = nextRowFiltered(self, &rightFail, minChrom, minEnd);
    if (row && !self->gotFinestBin)
	{
	// We are just starting -- queue up coarse-bin items, if any, until we get the first
	// finest-bin item.
	row = getFinestBinItem(self, row, &rightFail, minChrom, minEnd);
	}
    // Time to merge-sort finest-bin items from mysql with coarse-bin items from queue.
    if (row != NULL)
	{
	struct annoRow *aRow = rowToAnnoRow(self, row, rightFail, self->qLm);
	return mergeRow(self, aRow, callerLm);
	}
    else
	{
	struct annoRow *qRow = slPopHead(&(self->bigItemQueue));
	enum annoRowType rowType = self->streamer.rowType;
	int numCols = self->streamer.numCols;
	return annoRowClone(qRow, rowType, numCols, callerLm);
	}
    }
}