static struct bbiInterval *bigBedCoverageIntervals(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, struct lm *lm) /* Return intervals where the val is the depth of coverage. */ { /* Get list of overlapping intervals */ struct bigBedInterval *bi, *biList = bigBedIntervalQuery(bbi, chrom, start, end, 0, lm); if (biList == NULL) return NULL; /* Make a range tree that collects coverage. */ struct rbTree *rangeTree = rangeTreeNew(); for (bi = biList; bi != NULL; bi = bi->next) rangeTreeAddToCoverageDepth(rangeTree, bi->start, bi->end); struct range *range, *rangeList = rangeTreeList(rangeTree); /* Convert rangeList to bbiInterval list. */ struct bbiInterval *bwi, *bwiList = NULL; for (range = rangeList; range != NULL; range = range->next) { lmAllocVar(lm, bwi); bwi->start = range->start; if (bwi->start < start) bwi->start = start; bwi->end = range->end; if (bwi->end > end) bwi->end = end; bwi->val = ptToInt(range->val); slAddHead(&bwiList, bwi); } slReverse(&bwiList); /* Clean up and go home. */ rangeTreeFree(&rangeTree); return bwiList; }
void edwBamToWig(char *input, char *output) /* edwBamToWig - Convert a bam file to a wig file by measuring depth of coverage, optionally adjusting hit size to average for library.. */ { FILE *f = mustOpen(output, "w"); /* Open file and get header for it. */ samfile_t *sf = samopen(input, "rb", NULL); if (sf == NULL) errnoAbort("Couldn't open %s.\n", input); bam_header_t *head = sf->header; if (head == NULL) errAbort("Aborting ... Bad BAM header in file: %s", input); /* Scan through input populating genome range trees */ struct genomeRangeTree *grt = genomeRangeTreeNew(); bam1_t one = {}; for (;;) { /* Read next record. */ if (bam_read1(sf->x.bam, &one) < 0) break; if (one.core.tid >= 0 && one.core.n_cigar > 0) { char *chrom = head->target_name[one.core.tid]; int start = one.core.pos; int end = start + one.core.l_qseq; if (one.core.flag & BAM_FREVERSE) { start -= clPad; } else { end += clPad; } struct rbTree *rt = genomeRangeTreeFindOrAddRangeTree(grt,chrom); rangeTreeAddToCoverageDepth(rt, start, end); } } /* Convert genome range tree into output wig */ /* Get list of chromosomes. */ struct hashEl *hel, *helList = hashElListHash(grt->hash); for (hel = helList; hel != NULL; hel = hel->next) { char *chrom = hel->name; struct rbTree *rt = hel->val; struct range *range, *rangeList = rangeTreeList(rt); for (range = rangeList; range != NULL; range = range->next) { fprintf(f, "%s\t%d\t%d\t%d\n", chrom, range->start, range->end, ptToInt(range->val)); } } carefulClose(&f); }
struct rbTree *rangeTreeForBedChrom(struct lineFile *lf, char *chrom) /* Read lines from bed file as long as they match chrom. Return a rangeTree that * corresponds to the coverage. */ { struct rbTree *tree = rangeTreeNew(); char *line; while (lineFileNextReal(lf, &line)) { if (!startsWithWord(chrom, line)) { lineFileReuse(lf); break; } char *row[3]; chopLine(line, row); unsigned start = sqlUnsigned(row[1]); unsigned end = sqlUnsigned(row[2]); rangeTreeAddToCoverageDepth(tree, start, end); } return tree; }
struct peakCluster *peakClusterItems(struct lm *lm, struct peakItem *itemList, double forceJoinScore, double weakLevel) /* Convert a list of items to a list of clusters of items. This may break up clusters that * have weakly linked parts. [ ] AAAAAAAAAAAAAAAAAA BBBBBB DDDDDD CCCC EEEE gets tranformed into [ ] [ ] AAAAAAAAAAAAAAAAAA BBBBBB DDDDDD CCCC EEEE The strategy is to build a rangeTree of coverage, which might look something like so: 123333211123333211 then define cluster ends that exceed the minimum limit, which is either weakLevel (usually 10%) of the highest or forceJoinScore if weakLevel times the highest is more than forceJoinScore. This will go to something like so: [---] [----] Finally the items that are overlapping a cluster are assigned to it. Note that this may mean that an item may be in multiple clusters. [ABC] [ ADE] */ { int easyMax = round(1.0/weakLevel); int itemCount = slCount(itemList); struct peakCluster *clusterList = NULL; if (itemCount < easyMax) { struct peakItem *item = itemList; int chromStart = item->chromStart; int chromEnd = item->chromEnd; for (item = item->next; item != NULL; item = item->next) { if (item->chromStart < chromStart) chromStart = item->chromStart; if (item->chromEnd > chromEnd) chromEnd = item->chromEnd; } addCluster(lm, itemList, chromStart, chromEnd, &clusterList); } else { /* Make up coverage tree. */ struct rbTree *covTree = rangeTreeNew(); struct peakItem *item; for (item = itemList; item != NULL; item = item->next) rangeTreeAddToCoverageDepth(covTree, item->chromStart, item->chromEnd); struct range *range, *rangeList = rangeTreeList(covTree); /* Figure out maximum coverage. */ int maxCov = 0; for (range = rangeList; range != NULL; range = range->next) { int cov = ptToInt(range->val); if (cov > maxCov) maxCov = cov; } /* Figure coverage threshold. */ int threshold = round(maxCov * weakLevel); if (threshold > forceJoinScore-1) threshold = forceJoinScore-1; /* Loop through emitting sections over threshold as clusters */ boolean inRange = FALSE; boolean start = 0, end = 0; for (range = rangeList; range != NULL; range = range->next) { int cov = ptToInt(range->val); if (cov > threshold) { if (inRange) end = range->end; else { inRange = TRUE; start = range->start; end = range->end; } } else { if (inRange) { addCluster(lm, itemList, start, end, &clusterList); inRange = FALSE; } } } if (inRange) addCluster(lm, itemList, start, end, &clusterList); } slReverse(&clusterList); return clusterList; }