int bamAddOneSamAlignment(const bam1_t *bam, void *data, bam_hdr_t *header) /* bam_fetch() calls this on each bam alignment retrieved. Translate each bam * into a samAlignment. */ { struct bamToSamHelper *helper = (struct bamToSamHelper *)data; struct lm *lm = helper->lm; struct samAlignment *sam; lmAllocVar(lm, sam); const bam1_core_t *core = &bam->core; struct dyString *dy = helper->dy; sam->qName = lmCloneString(lm, bam1_qname(bam)); sam->flag = core->flag; if (helper->chrom != NULL) sam->rName = helper->chrom; else sam->rName = lmCloneString(lm, header->target_name[core->tid]); sam->pos = core->pos + 1; sam->mapQ = core->qual; dyStringClear(dy); bamUnpackCigar(bam, dy); sam->cigar = lmCloneStringZ(lm, dy->string, dy->stringSize); if (core->mtid >= 0) { if (core->tid == core->mtid) sam->rNext = "="; else sam->rNext = lmCloneString(lm, header->target_name[core->mtid]); } else sam->rNext = "*"; sam->pNext = core->mpos + 1; sam->tLen = core->isize; sam->seq = lmAlloc(lm, core->l_qseq + 1); bamUnpackQuerySequence(bam, FALSE, sam->seq); char *bamQual = (char *)bam1_qual(bam); if (isAllSameChar(bamQual, core->l_qseq, -1)) sam->qual = "*"; else { sam->qual = lmCloneStringZ(lm, bamQual, core->l_qseq); addToChars(sam->qual, core->l_qseq, 33); } dyStringClear(dy); bamUnpackAux(bam, dy); sam->tagTypeVals = lmCloneStringZ(lm, dy->string, dy->stringSize); slAddHead(&helper->samList, sam); return 0; }
char *lmCloneString(struct lm *lm, char *string) /* Return local mem copy of string. */ { if (string == NULL) return NULL; else return lmCloneStringZ(lm, string, strlen(string)); }
char *lmCloneFirstWord(struct lm *lm, char *line) /* Clone first word in line */ { char *startFirstWord = skipLeadingSpaces(line); if (startFirstWord == NULL) return NULL; char *endFirstWord = skipToSpaces(startFirstWord); if (endFirstWord == NULL) return lmCloneString(lm, startFirstWord); else return lmCloneStringZ(lm, startFirstWord, endFirstWord - startFirstWord); }
static inline char *vcfFileCloneStrZ(struct vcfFile *vcff, char *str, size_t size) /* Use vcff's local mem to allocate memory for a string and copy it. */ { return lmCloneStringZ( vcfFileLm(vcff), str, size); }
static struct gpFx *gpFxChangedCds(struct allele *allele, struct genePred *pred, struct txCoords *txc, int exonIx, boolean predIsNmd, struct dnaSeq *transcriptSequence, struct lm *lm) /* calculate effect of allele change on coding transcript */ { // calculate original and variant coding DNA and AA's boolean addedBasesForFrame = FALSE; char *oldCodingSequence = getCodingSequence(pred, transcriptSequence->dna, &addedBasesForFrame, lm); int startInCds = txc->startInCds, endInCds = txc->endInCds; if (addedBasesForFrame) { // The annotated CDS exons were not all in frame, so getCodingSequence added 'N's // and now we can't simply use txc->startInCds. startInCds = getCorrectedCdsOffset(pred, txc->startInCds); endInCds = getCorrectedCdsOffset(pred, txc->endInCds); } int oldCdsLen = strlen(oldCodingSequence); char *oldaa = lmSimpleTranslate(lm, oldCodingSequence, oldCdsLen); int cdsBasesAdded = 0; char *newCodingSequence = gpFxModifyCodingSequence(oldCodingSequence, pred, startInCds, endInCds, allele, &cdsBasesAdded, lm); int newCdsLen = strlen(newCodingSequence); char *newaa = lmSimpleTranslate(lm, newCodingSequence, newCdsLen); // allocate the effect structure - fill in soNumber and details below struct gpFx *effect = gpFxNew(allele->sequence, pred->name, coding_sequence_variant, codingChange, lm); struct codingChange *cc = &effect->details.codingChange; cc->cDnaPosition = txc->startInCdna; cc->cdsPosition = startInCds; cc->exonNumber = exonIx; int pepPos = startInCds / 3; // At this point we don't use genePredExt's exonFrames field -- we just assume that // the CDS starts in frame. That's not always the case (e.g. ensGene has some CDSs // that begin out of frame), so watch out for early truncation of oldCodingSequence // due to stop codon in the wrong frame: if (pepPos >= strlen(oldaa)) return effect; cc->pepPosition = pepPos; if (cdsBasesAdded % 3 == 0) { // Common case: substitution, same number of old/new codons/peps: int numOldCodons = (1 + allele->length / 3), numNewCodons = (1 + allele->length / 3); if (cdsBasesAdded > 0) { // insertion: more new codons than old numOldCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1; numNewCodons = numOldCodons + (cdsBasesAdded / 3); } else if (cdsBasesAdded < 0) { // deletion: more old codons than new numNewCodons = (cc->cdsPosition % 3) == 0 ? 0 : 1; numOldCodons = numNewCodons + (-cdsBasesAdded / 3); } cc->codonOld = lmCloneStringZ(lm, oldCodingSequence + pepPos*3, numOldCodons*3); cc->codonNew = lmCloneStringZ(lm, newCodingSequence + pepPos*3, numNewCodons*3); cc->aaOld = lmCloneStringZ(lm, oldaa + pepPos, numOldCodons); cc->aaNew = lmCloneStringZ(lm, newaa + pepPos, numNewCodons); } else { // frameshift -- who knows how many codons we can reliably predict... cc->codonOld = lmCloneString(lm, oldCodingSequence + pepPos*3); cc->codonNew = lmCloneString(lm, newCodingSequence + pepPos*3); cc->aaOld = lmCloneString(lm, oldaa + pepPos); cc->aaNew = lmCloneString(lm, newaa + pepPos); } if (predIsNmd) // This transcript is already subject to nonsense-mediated decay, so the effect // is probably not a big deal: effect->soNumber = NMD_transcript_variant; else setSpecificCodingSoTerm(effect, oldaa, newaa, cdsBasesAdded); return effect; }
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int maxItems, struct lm *lm) /* Get data for interval. Return list allocated out of lm. Set maxItems to maximum * number of items to return, or to 0 for all items. */ { struct bigBedInterval *el, *list = NULL; int itemCount = 0; bbiAttachUnzoomedCir(bbi); bits32 chromId; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, chrom, start, end, &chromId); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); char *mergedBuf = NULL; for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } while (blockPt < blockEnd) { /* Read next record into local variables. */ bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); /* calculate length of rest of bed fields */ int restLen = strlen(blockPt); /* If we're actually in range then copy it into a new element and add to list. */ if (chr == chromId && s < end && e > start) { ++itemCount; if (maxItems > 0 && itemCount > maxItems) break; lmAllocVar(lm, el); el->start = s; el->end = e; if (restLen > 0) el->rest = lmCloneStringZ(lm, blockPt, restLen); el->chromId = chromId; slAddHead(&list, el); } // move blockPt pointer to end of previous bed blockPt += restLen + 1; } if (maxItems > 0 && itemCount > maxItems) break; blockBuf += block->size; } if (maxItems > 0 && itemCount > maxItems) break; freez(&mergedBuf); } freez(&mergedBuf); freeMem(uncompressBuf); slFreeList(&blockList); slReverse(&list); return list; }
char *getGenomicSequence(char *chromSeq, uint start, uint end, struct lm *lm) /* Return genomic sequence from start to end. */ { return lmCloneStringZ(lm, chromSeq+start, (end - start)); }
static char *vcfFileCloneStrZ(struct vcfFile *vcff, char *str, size_t size) /* allocate memory for a string and copy it */ { return lmCloneStringZ(vcff->pool->lm, str, size); }