Пример #1
0
INLINE boolean findColumn(struct asColumn *columns, char *name, int *retIx, char **retName)
/* Scan columns for name.
 * If found, set retIx to column index, set retName to clone of name, and return TRUE.
 * If not found, set retIx to -1, set retName to NULL, and return FALSE; */
{
int ix = asColumnFindIx(columns, name);
if (retIx != NULL)
    *retIx = ix;
if (retName != NULL)
    {
    if (ix >= 0)
	*retName = cloneString(name);
    else
	*retName = NULL;
    }
return (ix >= 0);
}
Пример #2
0
struct annoRow *annoGratorGpVarIntegrate(struct annoGrator *gSelf,
					 struct annoStreamRows *primaryData,
					 boolean *retRJFilterFailed, struct lm *callerLm)
// integrate a variant and a genePred, generate as many rows as
// needed to capture all the changes
{
struct annoGratorGpVar *self = (struct annoGratorGpVar *)gSelf;
lmCleanup(&(self->lm));
self->lm = lmInit(0);
// Temporarily tweak primaryRow's start and end to find upstream/downstream overlap:
struct annoRow *primaryRow = primaryData->rowList;
int pStart = primaryRow->start, pEnd = primaryRow->end;
if (primaryRow->start <= GPRANGE)
    primaryRow->start = 0;
else
    primaryRow->start -= GPRANGE;
primaryRow->end += GPRANGE;
struct annoRow *rows = annoGratorIntegrate(gSelf, primaryData, retRJFilterFailed, self->lm);
primaryRow->start = pStart;
primaryRow->end = pEnd;

if (self->variantFromRow == NULL)
    setVariantFromRow(self, primaryData);
if (self->curChromSeq == NULL || differentString(self->curChromSeq->name, primaryRow->chrom))
    {
    dnaSeqFree(&self->curChromSeq);
    struct twoBitFile *tbf = self->grator.streamer.assembly->tbf;
    self->curChromSeq = twoBitReadSeqFragLower(tbf, primaryRow->chrom, 0, 0);
    }
// TODO Performance improvement: instead of creating the transcript sequence for each
// variant that intersects the transcript, cache transcript sequence; possibly
// an slPair with a concatenation of {chrom, txStart, txEnd, cdsStart, cdsEnd,
// exonStarts, exonEnds} as the name, and sequence as the val.  When something in
// the list is no longer in the list of rows from the internal annoGratorIntegrate call,
// drop it.
// BETTER YET: make a callback for gpFx to get CDS sequence only when it needs it.
char *refAllele = getGenomicSequence(self->curChromSeq->dna, primaryRow->start, primaryRow->end,
				     self->lm);
struct variant *variant = self->variantFromRow(self, primaryRow, refAllele);

if (rows == NULL)
    {
    // No genePreds means that the primary variant is intergenic.
    if (self->funcFilter != NULL && self->funcFilter->intergenic)
	return aggvIntergenicRow(self, variant, retRJFilterFailed, callerLm);
    else if (retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap)
	*retRJFilterFailed = TRUE;
    return NULL;
    }
if (retRJFilterFailed && *retRJFilterFailed)
    return NULL;

struct annoRow *outRows = NULL;

int hasFrames = (asColumnFindIx(gSelf->mySource->asObj->columnList, "exonFrames") >= 0);

for(; rows; rows = rows->next)
    {
    char **inWords = rows->data;

    // work around genePredLoad's trashing its input
    char *saveExonStarts = lmCloneString(self->lm, inWords[8]);
    char *saveExonEnds = lmCloneString(self->lm, inWords[9]);
    struct genePred *gp = hasFrames ? genePredExtLoad(inWords, GENEPREDX_NUM_COLS) :
				      genePredLoad(inWords);
    inWords[8] = saveExonStarts;
    inWords[9] = saveExonEnds;

    struct annoRow *outRow = aggvGenRows(self, variant, gp, rows, callerLm);
    if (outRow != NULL)
	{
	slReverse(&outRow);
	outRows = slCat(outRow, outRows);
	}
    genePredFree(&gp);
    }
slReverse(&outRows);
// If all rows failed the filter, and we must overlap, set *retRJFilterFailed.
if (outRows == NULL && retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap)
    *retRJFilterFailed = TRUE;
return outRows;
}