struct annoRow *annoGratorIntegrate(struct annoGrator *self, struct annoStreamRows *primaryData, boolean *retRJFilterFailed, struct lm *callerLm) /* Given a single row from the primary source, get all overlapping rows from internal * source, and produce joined output rows. * If retRJFilterFailed is non-NULL: * - any overlapping row has a rightJoin filter failure (see annoFilter.h), or * - overlap rule is agoMustOverlap and no rows overlap, or * - overlap rule is agoMustNotOverlap and any overlapping row is found, * then set retRJFilterFailed and stop. */ { struct annoRow *primaryRow = primaryData->rowList; struct annoRow *rowList = NULL; agCheckPrimarySorting(self, primaryRow); // In order to catch the intersection of two 0-length elements (i.e. two insertions), // we have to broaden our search a little: int pStart = primaryRow->start, pEnd = primaryRow->end; if (pStart == pEnd) { pStart--; pEnd++; } char *pChrom = primaryRow->chrom; agTrimToStart(self, pChrom, pStart); agFetchToEnd(self, pChrom, pStart, pEnd); boolean rjFailHard = (retRJFilterFailed != NULL); if (rjFailHard) *retRJFilterFailed = FALSE; struct annoRow *qRow; for (qRow = self->qHead; qRow != NULL; qRow = qRow->next) { if (qRow->start < pEnd && qRow->end > pStart && sameString(qRow->chrom, pChrom)) { int numCols = self->mySource->numCols; enum annoRowType rowType = self->mySource->rowType; slAddHead(&rowList, annoRowClone(qRow, rowType, numCols, callerLm)); if (rjFailHard && qRow->rightJoinFail) { *retRJFilterFailed = TRUE; break; } } } slReverse(&rowList); // If no rows overlapped primary, and there is a right-join, !isExclude (i.e. isInclude) filter, // then we need to set retRJFilterFailed because the condition was not met to include // the primary item. if (retRJFilterFailed && ((rowList == NULL && (self->haveRJIncludeFilter || self->overlapRule == agoMustOverlap)) || (rowList != NULL && self->overlapRule == agoMustNotOverlap))) *retRJFilterFailed = TRUE; return rowList; }
static struct annoRow *nextQueuedRow(struct annoStreamDb *self, struct lm *callerLm) // Return the head of either bigItemQueue or smallItemQueue, depending on which has // the lower chromStart. { struct annoRow *row = NULL; if (self->bigItemQueue && annoRowCmp(&(self->bigItemQueue), &(self->smallItemQueue)) < 0) row = slPopHead(&(self->bigItemQueue)); else row = slPopHead(&(self->smallItemQueue)); if (self->bigItemQueue == NULL && self->smallItemQueue == NULL) // All done merge-sorting, just stream finest-bin items from here on out. self->mergeBins = FALSE; enum annoRowType rowType = self->streamer.rowType; int numCols = self->streamer.numCols; return annoRowClone(row, rowType, numCols, callerLm); }
static struct annoRow *mergeRow(struct annoStreamDb *self, struct annoRow *aRow, struct lm *callerLm) /* Compare head of bigItemQueue with (finest-bin) aRow; return the one with * lower chromStart and save the other for later. */ { struct annoRow *outRow = aRow; if (self->bigItemQueue == NULL) { // No coarse-bin items to merge-sort, just stream finest-bin items from here on out. resetMergeState(self); } else if (annoRowCmp(&(self->bigItemQueue), &aRow) < 0) { // Big item gets to go now, so save aside small item for next time. outRow = slPopHead(&(self->bigItemQueue)); slAddHead(&(self->smallItemQueue), aRow); } enum annoRowType rowType = self->streamer.rowType; int numCols = self->streamer.numCols; return annoRowClone(outRow, rowType, numCols, callerLm); }
static struct annoRow *nextRowMergeBins(struct annoStreamDb *self, char *minChrom, uint minEnd, struct lm *callerLm) /* Fetch the next filtered row from mysql, merge-sorting coarse-bin items into finest-bin * items to maintain chromStart ordering. */ { assert(self->mergeBins && self->hasBin); if (self->smallItemQueue) // In this case we have already begun merge-sorting; don't pull a new row from mysql, // use the queues. This should keep smallItemQueue's max depth at 1. return nextQueuedRow(self, callerLm); else { // We might need to collect initial coarse-bin items, or might already be merge-sorting. boolean rightFail = FALSE; char **row = nextRowFiltered(self, &rightFail, minChrom, minEnd); if (row && !self->gotFinestBin) { // We are just starting -- queue up coarse-bin items, if any, until we get the first // finest-bin item. row = getFinestBinItem(self, row, &rightFail, minChrom, minEnd); } // Time to merge-sort finest-bin items from mysql with coarse-bin items from queue. if (row != NULL) { struct annoRow *aRow = rowToAnnoRow(self, row, rightFail, self->qLm); return mergeRow(self, aRow, callerLm); } else { struct annoRow *qRow = slPopHead(&(self->bigItemQueue)); enum annoRowType rowType = self->streamer.rowType; int numCols = self->streamer.numCols; return annoRowClone(qRow, rowType, numCols, callerLm); } } }