struct perBaseWig* perBaseWigLoadContinue(struct metaBig* mb, char* chrom, int start, int end) /* load a perBaseWig from a wig/bigWig that's already open */ { if (mb->type != isaBigWig) return NULL; struct perBaseWig* list = NULL; struct lm* lm = lmInit(0); struct bbiInterval* intervals = bigWigIntervalQuery(mb->big.bbi, chrom, start, end, lm); struct bbiInterval *bbStart = intervals, *bbEnd; while (bbStart != NULL) { struct perBaseWig* region; struct bbiInterval* cur; int i = 0; bbEnd = bbStart; /* loop until discontinuity detected */ while ((bbEnd->next != NULL) && (bbEnd->end == bbEnd->next->start)) bbEnd = bbEnd->next; region = alloc_perBaseWig(chrom, bbStart->start, bbEnd->end); for (cur = bbStart; cur != bbEnd->next; cur = cur->next) { int j; for (j = cur->start; j < cur->end; j++) region->data[i++] = cur->val; } slAddHead(&list, region); bbStart = bbEnd->next; } lmCleanup(&lm); slReverse(&list); return list; }
struct perBaseWig* alloc_zero_perBaseWig(char* chrom, int start, int end) /* simply allocate the perBaseWig. this is filled with zeros */ /* it may be best to call this one before writing a wig */ { struct perBaseWig* pbw = alloc_perBaseWig(chrom, start, end); int i; for (i = 0; i < pbw->len; i++) pbw->data[i] = 0; return pbw; }
struct perBaseWig* alloc_fill_perBaseWig(char* chrom, int start, int end, double fill) /* fill the pbw with a given value instead of NA */ { struct perBaseWig* pbw = alloc_perBaseWig(chrom, start, end); if (!isnan(fill)) { int i; for (i = 0; i < pbw->len; i++) pbw->data[i] = fill; } return pbw; }
struct perBaseWig* perBaseWigLoadHuge(struct metaBig* mb, struct bed* regions) /* Load a huge pbw, gaps removed */ { long supposed_size = 0; struct bed* bed; struct perBaseWig* big_pbw = NULL; int big_offset = 0; for (bed = regions; bed != NULL; bed = bed->next) supposed_size += bed->chromEnd - bed->chromStart; if (supposed_size > powl(2, 31)) errAbort("Requested regions sum to greater than 2^31 = 2,147,483,648 bases. The current implementation is restricted to fewer than this"); big_pbw = alloc_perBaseWig("various", 0, supposed_size); for (bed = regions; bed != NULL; bed = bed->next) { perBaseWigLoadHugeContinue(mb, big_pbw, &big_offset, bed); } big_pbw->total_coverage = (unsigned)big_offset; return big_pbw; }
struct perBaseWig* alloc_perBaseWig_matchingSequence(struct dnaSeq* seq, boolean skipN) /* allocate a perBaseWig to match the length of the dnaSeq. Optionally choose to skip */ /* N bases by making a subsections bed list that avoids them. One feature this */ /* function has is that the name of the sequence is something like "chrom:start-end" */ /* in 0-based coordinates. If so, the chromStart/chromEnd are set to match the coordinates in */ /* the name. If not, the chromStart will be 0, and the chromEnd will be seq->size. */ /* (this is used by symcurv) */ { struct perBaseWig* pbw; int size = seq->size; unsigned chromStart; unsigned chromEnd; char* chrom = seq_name_disassemble(seq, &chromStart, &chromEnd); pbw = alloc_perBaseWig(chrom, (int)chromStart, (int)chromEnd); pbw->subsections = seq_subsection_list(seq, skipN); AllocArray(pbw->data, size); return pbw; }
struct hash *genomePbw(struct hash *qSizes) /* make a parallel hash of pbws given the size hash, also keyed on chrom name */ { struct hash *pbwHash = newHash(10); struct hashEl *list = hashElListHash(qSizes); struct hashEl *el; const double na = NANUM; int i; for (el = list; el != NULL; el = el->next) { int size = ptToInt(el->val); struct perBaseWig *pbw = alloc_perBaseWig(el->name, 0, size); for (i = 0; i < pbw->len; i++) pbw->data[i] = na; pbw->name = cloneString(el->name); pbw->strand[0] = '+'; pbw->strand[1] = '\0'; hashAdd(pbwHash, el->name, pbw); } hashElFreeList(&list); return pbwHash; }