INLINE boolean findColumn(struct asColumn *columns, char *name, int *retIx, char **retName) /* Scan columns for name. * If found, set retIx to column index, set retName to clone of name, and return TRUE. * If not found, set retIx to -1, set retName to NULL, and return FALSE; */ { int ix = asColumnFindIx(columns, name); if (retIx != NULL) *retIx = ix; if (retName != NULL) { if (ix >= 0) *retName = cloneString(name); else *retName = NULL; } return (ix >= 0); }
struct annoRow *annoGratorGpVarIntegrate(struct annoGrator *gSelf, struct annoStreamRows *primaryData, boolean *retRJFilterFailed, struct lm *callerLm) // integrate a variant and a genePred, generate as many rows as // needed to capture all the changes { struct annoGratorGpVar *self = (struct annoGratorGpVar *)gSelf; lmCleanup(&(self->lm)); self->lm = lmInit(0); // Temporarily tweak primaryRow's start and end to find upstream/downstream overlap: struct annoRow *primaryRow = primaryData->rowList; int pStart = primaryRow->start, pEnd = primaryRow->end; if (primaryRow->start <= GPRANGE) primaryRow->start = 0; else primaryRow->start -= GPRANGE; primaryRow->end += GPRANGE; struct annoRow *rows = annoGratorIntegrate(gSelf, primaryData, retRJFilterFailed, self->lm); primaryRow->start = pStart; primaryRow->end = pEnd; if (self->variantFromRow == NULL) setVariantFromRow(self, primaryData); if (self->curChromSeq == NULL || differentString(self->curChromSeq->name, primaryRow->chrom)) { dnaSeqFree(&self->curChromSeq); struct twoBitFile *tbf = self->grator.streamer.assembly->tbf; self->curChromSeq = twoBitReadSeqFragLower(tbf, primaryRow->chrom, 0, 0); } // TODO Performance improvement: instead of creating the transcript sequence for each // variant that intersects the transcript, cache transcript sequence; possibly // an slPair with a concatenation of {chrom, txStart, txEnd, cdsStart, cdsEnd, // exonStarts, exonEnds} as the name, and sequence as the val. When something in // the list is no longer in the list of rows from the internal annoGratorIntegrate call, // drop it. // BETTER YET: make a callback for gpFx to get CDS sequence only when it needs it. char *refAllele = getGenomicSequence(self->curChromSeq->dna, primaryRow->start, primaryRow->end, self->lm); struct variant *variant = self->variantFromRow(self, primaryRow, refAllele); if (rows == NULL) { // No genePreds means that the primary variant is intergenic. if (self->funcFilter != NULL && self->funcFilter->intergenic) return aggvIntergenicRow(self, variant, retRJFilterFailed, callerLm); else if (retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap) *retRJFilterFailed = TRUE; return NULL; } if (retRJFilterFailed && *retRJFilterFailed) return NULL; struct annoRow *outRows = NULL; int hasFrames = (asColumnFindIx(gSelf->mySource->asObj->columnList, "exonFrames") >= 0); for(; rows; rows = rows->next) { char **inWords = rows->data; // work around genePredLoad's trashing its input char *saveExonStarts = lmCloneString(self->lm, inWords[8]); char *saveExonEnds = lmCloneString(self->lm, inWords[9]); struct genePred *gp = hasFrames ? genePredExtLoad(inWords, GENEPREDX_NUM_COLS) : genePredLoad(inWords); inWords[8] = saveExonStarts; inWords[9] = saveExonEnds; struct annoRow *outRow = aggvGenRows(self, variant, gp, rows, callerLm); if (outRow != NULL) { slReverse(&outRow); outRows = slCat(outRow, outRows); } genePredFree(&gp); } slReverse(&outRows); // If all rows failed the filter, and we must overlap, set *retRJFilterFailed. if (outRows == NULL && retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap) *retRJFilterFailed = TRUE; return outRows; }