struct annoRow *aggvIntergenicRow(struct annoGratorGpVar *self, struct variant *variant, boolean *retRJFilterFailed, struct lm *callerLm) /* If intergenic variants (no overlapping or nearby genes) are to be included in output, * make an output row with empty genePred and a gpFx that is empty except for soNumber. */ { struct annoGrator *gSelf = &(self->grator); struct annoStreamer *sSelf = &(gSelf->streamer); char **wordsOut; lmAllocArray(self->lm, wordsOut, sSelf->numCols); // Add empty strings for genePred string columns: int gpColCount = gSelf->mySource->numCols; int i; for (i = 0; i < gpColCount; i++) wordsOut[i] = ""; struct gpFx *intergenicGpFx; lmAllocVar(self->lm, intergenicGpFx); intergenicGpFx->allele = firstAltAllele(variant->alleles); if (isAllNt(intergenicGpFx->allele, strlen(intergenicGpFx->allele))) touppers(intergenicGpFx->allele); intergenicGpFx->soNumber = intergenic_variant; intergenicGpFx->detailType = none; aggvStringifyGpFx(&wordsOut[gpColCount], intergenicGpFx, self->lm); boolean rjFail = (retRJFilterFailed && *retRJFilterFailed); return annoRowFromStringArray(variant->chrom, variant->chromStart, variant->chromEnd, rjFail, wordsOut, sSelf->numCols, callerLm); }
static struct gpFx *gpFxCheckUpDownstream(struct variant *variant, struct genePred *pred, struct lm *lm) // check to see if the variant is up or downstream { struct gpFx *effectsList = NULL; char *defaultAltAllele = firstAltAllele(variant->alleles); for(; variant ; variant = variant->next) { // Is this variant to the left or right of transcript? enum soTerm soNumber = 0; if (variant->chromStart < pred->txStart && variant->chromEnd > pred->txStart - GPRANGE) { if (*pred->strand == '+') soNumber = upstream_gene_variant; else soNumber = downstream_gene_variant; } else if (variant->chromEnd > pred->txEnd && variant->chromStart < pred->txEnd + GPRANGE) { if (*pred->strand == '+') soNumber = downstream_gene_variant; else soNumber = upstream_gene_variant; } if (soNumber != 0) { struct gpFx *effects = gpFxNew(defaultAltAllele, pred->name, soNumber, none, lm); effectsList = slCat(effectsList, effects); } } return effectsList; }
static struct gpFx *gpFxCheckTranscript(struct variant *variant, struct genePred *pred, struct dnaSeq *transcriptSeq, struct lm *lm) /* Check to see if variant overlaps an exon and/or intron of pred. */ { struct gpFx *effectsList = NULL; uint varStart = variant->chromStart, varEnd = variant->chromEnd; if (varStart < pred->txEnd && varEnd > pred->txStart) { boolean predIsNmd = genePredNmdTarget(pred); char *defaultAltAllele = firstAltAllele(variant->alleles); struct txCoords txc = getTxCoords(variant, pred); // Simplest case first: variant starts and ends in a single exon or single intron if (txc.startInExon == txc.endInExon && txc.startExonIx == txc.endExonIx) { int ix = txc.startExonIx; if (txc.startInExon) { // Exonic variant; figure out what kind: effectsList = slCat(effectsList, gpFxInExon(variant, &txc, ix, pred, predIsNmd, transcriptSeq, lm)); } else { // Intronic (and/or splice) variant: effectsList = slCat(effectsList, gpFxInIntron(variant, &txc, ix, pred, predIsNmd, defaultAltAllele, lm)); } } else { if (!predIsNmd) { // Let the user beware -- this variant is just complex (it overlaps at least one // exon/intron boundary). It could be an insertion, an MNV (multi-nt var) or // a deletion. struct gpFx *effect = gpFxNew(defaultAltAllele, pred->name, complex_transcript_variant, none, lm); effectsList = slCat(effectsList, effect); } // But we can at least say which introns and/or exons are affected. // Transform exon and intron numbers into ordered integers, -1 (upstream) through // 2*lastExonIx+1 (downstream), with even numbers being exonNum*2 and odd numbers // being intronNum*2 + 1: int vieStart = (2 * txc.startExonIx) + (txc.startInExon ? 0 : 1); int vieEnd = (2 * txc.endExonIx) + (txc.endInExon ? 0 : 1); if (vieEnd < vieStart) { // Insertion at exon boundary (or bug) if (vieEnd != vieStart-1 || varStart != varEnd || txc.startInExon == txc.endInExon) errAbort("gpFxCheckTranscript: expecting insertion in pred=%s " "but varStart=%d, varEnd=%d, vieStart=%d, vieEnd=%d, " "starts in %son, ends in %son", pred->name, varStart, varEnd, vieStart, vieEnd, (txc.startInExon ? "ex" : "intr"), (txc.endInExon ? "ex" : "intr")); // Since it's an insertion, remember that end is before start. if (txc.startInExon) { // Intronic end precedes exonic start. Watch out for upstream as "intron[-1]": if (txc.endExonIx >= 0) effectsList = slCat(effectsList, gpFxInIntron(variant, &txc, txc.endExonIx, pred, predIsNmd, defaultAltAllele, lm)); effectsList = slCat(effectsList, gpFxInExon(variant, &txc, txc.startExonIx, pred, predIsNmd, transcriptSeq, lm)); } else { // Exonic end precedes intronic start. effectsList = slCat(effectsList, gpFxInExon(variant, &txc, txc.endExonIx, pred, predIsNmd, transcriptSeq, lm)); // Watch out for downstream as "intron[lastExonIx]" if (txc.startExonIx < txc.exonCount - 1) effectsList = slCat(effectsList, gpFxInIntron(variant, &txc, txc.startExonIx, pred, predIsNmd, defaultAltAllele, lm)); } } // end if variant is insertion else { // MNV or deletion - consider each overlapping intron and/or exon int ie; // Watch out for upstream (vieStart < 0) and downstream (vieEnd > last exon). for (ie = max(vieStart, 0); ie <= min(vieEnd, 2*(pred->exonCount-1)); ie++) { boolean isExon = (ie%2 == 0); int ix = ie / 2; if (isExon) effectsList = slCat(effectsList, gpFxInExon(variant, &txc, ix, pred, predIsNmd, transcriptSeq, lm)); else effectsList = slCat(effectsList, gpFxInIntron(variant, &txc, ix, pred, predIsNmd, defaultAltAllele, lm)); } // end for each (partial) exon/intron overlapping variant } // end if variant is MNV or deletion } // end if variant is complex } // end if variant overlaps pred return effectsList; }